virtio_net.c 182 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* A network driver using virtio.
  3. *
  4. * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
  5. */
  6. //#define DEBUG
  7. #include <linux/netdevice.h>
  8. #include <linux/etherdevice.h>
  9. #include <linux/ethtool.h>
  10. #include <linux/module.h>
  11. #include <linux/virtio.h>
  12. #include <linux/virtio_net.h>
  13. #include <linux/bpf.h>
  14. #include <linux/bpf_trace.h>
  15. #include <linux/scatterlist.h>
  16. #include <linux/if_vlan.h>
  17. #include <linux/slab.h>
  18. #include <linux/cpu.h>
  19. #include <linux/average.h>
  20. #include <linux/filter.h>
  21. #include <linux/kernel.h>
  22. #include <linux/dim.h>
  23. #include <net/route.h>
  24. #include <net/xdp.h>
  25. #include <net/net_failover.h>
  26. #include <net/netdev_rx_queue.h>
  27. #include <net/netdev_queues.h>
  28. #include <net/xdp_sock_drv.h>
  29. static int napi_weight = NAPI_POLL_WEIGHT;
  30. module_param(napi_weight, int, 0444);
  31. static bool csum = true, gso = true, napi_tx = true;
  32. module_param(csum, bool, 0444);
  33. module_param(gso, bool, 0444);
  34. module_param(napi_tx, bool, 0644);
  35. /* FIXME: MTU in config. */
  36. #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
  37. #define GOOD_COPY_LEN 128
  38. #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
  39. /* Separating two types of XDP xmit */
  40. #define VIRTIO_XDP_TX BIT(0)
  41. #define VIRTIO_XDP_REDIR BIT(1)
  42. #define VIRTIO_XDP_FLAG BIT(0)
  43. #define VIRTIO_ORPHAN_FLAG BIT(1)
  44. /* RX packet size EWMA. The average packet size is used to determine the packet
  45. * buffer size when refilling RX rings. As the entire RX ring may be refilled
  46. * at once, the weight is chosen so that the EWMA will be insensitive to short-
  47. * term, transient changes in packet size.
  48. */
  49. DECLARE_EWMA(pkt_len, 0, 64)
  50. #define VIRTNET_DRIVER_VERSION "1.0.0"
  51. static const unsigned long guest_offloads[] = {
  52. VIRTIO_NET_F_GUEST_TSO4,
  53. VIRTIO_NET_F_GUEST_TSO6,
  54. VIRTIO_NET_F_GUEST_ECN,
  55. VIRTIO_NET_F_GUEST_UFO,
  56. VIRTIO_NET_F_GUEST_CSUM,
  57. VIRTIO_NET_F_GUEST_USO4,
  58. VIRTIO_NET_F_GUEST_USO6,
  59. VIRTIO_NET_F_GUEST_HDRLEN
  60. };
  61. #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
  62. (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
  63. (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
  64. (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
  65. (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
  66. (1ULL << VIRTIO_NET_F_GUEST_USO6))
  67. struct virtnet_stat_desc {
  68. char desc[ETH_GSTRING_LEN];
  69. size_t offset;
  70. size_t qstat_offset;
  71. };
  72. struct virtnet_sq_free_stats {
  73. u64 packets;
  74. u64 bytes;
  75. u64 napi_packets;
  76. u64 napi_bytes;
  77. };
  78. struct virtnet_sq_stats {
  79. struct u64_stats_sync syncp;
  80. u64_stats_t packets;
  81. u64_stats_t bytes;
  82. u64_stats_t xdp_tx;
  83. u64_stats_t xdp_tx_drops;
  84. u64_stats_t kicks;
  85. u64_stats_t tx_timeouts;
  86. u64_stats_t stop;
  87. u64_stats_t wake;
  88. };
  89. struct virtnet_rq_stats {
  90. struct u64_stats_sync syncp;
  91. u64_stats_t packets;
  92. u64_stats_t bytes;
  93. u64_stats_t drops;
  94. u64_stats_t xdp_packets;
  95. u64_stats_t xdp_tx;
  96. u64_stats_t xdp_redirects;
  97. u64_stats_t xdp_drops;
  98. u64_stats_t kicks;
  99. };
  100. #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
  101. #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
  102. #define VIRTNET_SQ_STAT_QSTAT(name, m) \
  103. { \
  104. name, \
  105. offsetof(struct virtnet_sq_stats, m), \
  106. offsetof(struct netdev_queue_stats_tx, m), \
  107. }
  108. #define VIRTNET_RQ_STAT_QSTAT(name, m) \
  109. { \
  110. name, \
  111. offsetof(struct virtnet_rq_stats, m), \
  112. offsetof(struct netdev_queue_stats_rx, m), \
  113. }
  114. static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
  115. VIRTNET_SQ_STAT("xdp_tx", xdp_tx),
  116. VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops),
  117. VIRTNET_SQ_STAT("kicks", kicks),
  118. VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts),
  119. };
  120. static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
  121. VIRTNET_RQ_STAT("drops", drops),
  122. VIRTNET_RQ_STAT("xdp_packets", xdp_packets),
  123. VIRTNET_RQ_STAT("xdp_tx", xdp_tx),
  124. VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects),
  125. VIRTNET_RQ_STAT("xdp_drops", xdp_drops),
  126. VIRTNET_RQ_STAT("kicks", kicks),
  127. };
  128. static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = {
  129. VIRTNET_SQ_STAT_QSTAT("packets", packets),
  130. VIRTNET_SQ_STAT_QSTAT("bytes", bytes),
  131. VIRTNET_SQ_STAT_QSTAT("stop", stop),
  132. VIRTNET_SQ_STAT_QSTAT("wake", wake),
  133. };
  134. static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = {
  135. VIRTNET_RQ_STAT_QSTAT("packets", packets),
  136. VIRTNET_RQ_STAT_QSTAT("bytes", bytes),
  137. };
  138. #define VIRTNET_STATS_DESC_CQ(name) \
  139. {#name, offsetof(struct virtio_net_stats_cvq, name), -1}
  140. #define VIRTNET_STATS_DESC_RX(class, name) \
  141. {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
  142. #define VIRTNET_STATS_DESC_TX(class, name) \
  143. {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
  144. static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
  145. VIRTNET_STATS_DESC_CQ(command_num),
  146. VIRTNET_STATS_DESC_CQ(ok_num),
  147. };
  148. static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
  149. VIRTNET_STATS_DESC_RX(basic, packets),
  150. VIRTNET_STATS_DESC_RX(basic, bytes),
  151. VIRTNET_STATS_DESC_RX(basic, notifications),
  152. VIRTNET_STATS_DESC_RX(basic, interrupts),
  153. };
  154. static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
  155. VIRTNET_STATS_DESC_TX(basic, packets),
  156. VIRTNET_STATS_DESC_TX(basic, bytes),
  157. VIRTNET_STATS_DESC_TX(basic, notifications),
  158. VIRTNET_STATS_DESC_TX(basic, interrupts),
  159. };
  160. static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
  161. VIRTNET_STATS_DESC_RX(csum, needs_csum),
  162. };
  163. static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
  164. VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg),
  165. VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg),
  166. };
  167. static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
  168. VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes),
  169. };
  170. static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
  171. VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes),
  172. };
  173. #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
  174. { \
  175. #name, \
  176. offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
  177. offsetof(struct netdev_queue_stats_rx, qstat_field), \
  178. }
  179. #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
  180. { \
  181. #name, \
  182. offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
  183. offsetof(struct netdev_queue_stats_tx, qstat_field), \
  184. }
  185. static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = {
  186. VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops),
  187. VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns),
  188. };
  189. static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = {
  190. VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops),
  191. VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors),
  192. };
  193. static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = {
  194. VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary),
  195. VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none),
  196. VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad),
  197. };
  198. static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = {
  199. VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none),
  200. VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum),
  201. };
  202. static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = {
  203. VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets),
  204. VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes),
  205. VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets),
  206. VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes),
  207. };
  208. static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = {
  209. VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets),
  210. VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes),
  211. VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets),
  212. VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes),
  213. };
  214. static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = {
  215. VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
  216. };
  217. static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = {
  218. VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
  219. };
  220. #define VIRTNET_Q_TYPE_RX 0
  221. #define VIRTNET_Q_TYPE_TX 1
  222. #define VIRTNET_Q_TYPE_CQ 2
  223. struct virtnet_interrupt_coalesce {
  224. u32 max_packets;
  225. u32 max_usecs;
  226. };
  227. /* The dma information of pages allocated at a time. */
  228. struct virtnet_rq_dma {
  229. dma_addr_t addr;
  230. u32 ref;
  231. u16 len;
  232. u16 need_sync;
  233. };
  234. /* Internal representation of a send virtqueue */
  235. struct send_queue {
  236. /* Virtqueue associated with this send _queue */
  237. struct virtqueue *vq;
  238. /* TX: fragments + linear part + virtio header */
  239. struct scatterlist sg[MAX_SKB_FRAGS + 2];
  240. /* Name of the send queue: output.$index */
  241. char name[16];
  242. struct virtnet_sq_stats stats;
  243. struct virtnet_interrupt_coalesce intr_coal;
  244. struct napi_struct napi;
  245. /* Record whether sq is in reset state. */
  246. bool reset;
  247. struct xsk_buff_pool *xsk_pool;
  248. dma_addr_t xsk_hdr_dma_addr;
  249. };
  250. /* Internal representation of a receive virtqueue */
  251. struct receive_queue {
  252. /* Virtqueue associated with this receive_queue */
  253. struct virtqueue *vq;
  254. struct napi_struct napi;
  255. struct bpf_prog __rcu *xdp_prog;
  256. struct virtnet_rq_stats stats;
  257. /* The number of rx notifications */
  258. u16 calls;
  259. /* Is dynamic interrupt moderation enabled? */
  260. bool dim_enabled;
  261. /* Used to protect dim_enabled and inter_coal */
  262. struct mutex dim_lock;
  263. /* Dynamic Interrupt Moderation */
  264. struct dim dim;
  265. u32 packets_in_napi;
  266. struct virtnet_interrupt_coalesce intr_coal;
  267. /* Chain pages by the private ptr. */
  268. struct page *pages;
  269. /* Average packet length for mergeable receive buffers. */
  270. struct ewma_pkt_len mrg_avg_pkt_len;
  271. /* Page frag for packet buffer allocation. */
  272. struct page_frag alloc_frag;
  273. /* RX: fragments + linear part + virtio header */
  274. struct scatterlist sg[MAX_SKB_FRAGS + 2];
  275. /* Min single buffer size for mergeable buffers case. */
  276. unsigned int min_buf_len;
  277. /* Name of this receive queue: input.$index */
  278. char name[16];
  279. struct xdp_rxq_info xdp_rxq;
  280. /* Record the last dma info to free after new pages is allocated. */
  281. struct virtnet_rq_dma *last_dma;
  282. struct xsk_buff_pool *xsk_pool;
  283. /* xdp rxq used by xsk */
  284. struct xdp_rxq_info xsk_rxq_info;
  285. struct xdp_buff **xsk_buffs;
  286. /* Do dma by self */
  287. bool do_dma;
  288. };
  289. /* This structure can contain rss message with maximum settings for indirection table and keysize
  290. * Note, that default structure that describes RSS configuration virtio_net_rss_config
  291. * contains same info but can't handle table values.
  292. * In any case, structure would be passed to virtio hw through sg_buf split by parts
  293. * because table sizes may be differ according to the device configuration.
  294. */
  295. #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
  296. struct virtio_net_ctrl_rss {
  297. u32 hash_types;
  298. u16 indirection_table_mask;
  299. u16 unclassified_queue;
  300. u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */
  301. u16 max_tx_vq;
  302. u8 hash_key_length;
  303. u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
  304. u16 *indirection_table;
  305. };
  306. /* Control VQ buffers: protected by the rtnl lock */
  307. struct control_buf {
  308. struct virtio_net_ctrl_hdr hdr;
  309. virtio_net_ctrl_ack status;
  310. };
  311. struct virtnet_info {
  312. struct virtio_device *vdev;
  313. struct virtqueue *cvq;
  314. struct net_device *dev;
  315. struct send_queue *sq;
  316. struct receive_queue *rq;
  317. unsigned int status;
  318. /* Max # of queue pairs supported by the device */
  319. u16 max_queue_pairs;
  320. /* # of queue pairs currently used by the driver */
  321. u16 curr_queue_pairs;
  322. /* # of XDP queue pairs currently used by the driver */
  323. u16 xdp_queue_pairs;
  324. /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
  325. bool xdp_enabled;
  326. /* I like... big packets and I cannot lie! */
  327. bool big_packets;
  328. /* number of sg entries allocated for big packets */
  329. unsigned int big_packets_num_skbfrags;
  330. /* Host will merge rx buffers for big packets (shake it! shake it!) */
  331. bool mergeable_rx_bufs;
  332. /* Host supports rss and/or hash report */
  333. bool has_rss;
  334. bool has_rss_hash_report;
  335. u8 rss_key_size;
  336. u16 rss_indir_table_size;
  337. u32 rss_hash_types_supported;
  338. u32 rss_hash_types_saved;
  339. struct virtio_net_ctrl_rss rss;
  340. /* Has control virtqueue */
  341. bool has_cvq;
  342. /* Lock to protect the control VQ */
  343. struct mutex cvq_lock;
  344. /* Host can handle any s/g split between our header and packet data */
  345. bool any_header_sg;
  346. /* Packet virtio header size */
  347. u8 hdr_len;
  348. /* Work struct for delayed refilling if we run low on memory. */
  349. struct delayed_work refill;
  350. /* Is delayed refill enabled? */
  351. bool refill_enabled;
  352. /* The lock to synchronize the access to refill_enabled */
  353. spinlock_t refill_lock;
  354. /* Work struct for config space updates */
  355. struct work_struct config_work;
  356. /* Work struct for setting rx mode */
  357. struct work_struct rx_mode_work;
  358. /* OK to queue work setting RX mode? */
  359. bool rx_mode_work_enabled;
  360. /* Does the affinity hint is set for virtqueues? */
  361. bool affinity_hint_set;
  362. /* CPU hotplug instances for online & dead */
  363. struct hlist_node node;
  364. struct hlist_node node_dead;
  365. struct control_buf *ctrl;
  366. /* Ethtool settings */
  367. u8 duplex;
  368. u32 speed;
  369. /* Is rx dynamic interrupt moderation enabled? */
  370. bool rx_dim_enabled;
  371. /* Interrupt coalescing settings */
  372. struct virtnet_interrupt_coalesce intr_coal_tx;
  373. struct virtnet_interrupt_coalesce intr_coal_rx;
  374. unsigned long guest_offloads;
  375. unsigned long guest_offloads_capable;
  376. /* failover when STANDBY feature enabled */
  377. struct failover *failover;
  378. u64 device_stats_cap;
  379. };
  380. struct padded_vnet_hdr {
  381. struct virtio_net_hdr_v1_hash hdr;
  382. /*
  383. * hdr is in a separate sg buffer, and data sg buffer shares same page
  384. * with this header sg. This padding makes next sg 16 byte aligned
  385. * after the header.
  386. */
  387. char padding[12];
  388. };
  389. struct virtio_net_common_hdr {
  390. union {
  391. struct virtio_net_hdr hdr;
  392. struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
  393. struct virtio_net_hdr_v1_hash hash_v1_hdr;
  394. };
  395. };
  396. static struct virtio_net_common_hdr xsk_hdr;
  397. static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
  398. static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq);
  399. static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
  400. struct net_device *dev,
  401. unsigned int *xdp_xmit,
  402. struct virtnet_rq_stats *stats);
  403. static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
  404. struct sk_buff *skb, u8 flags);
  405. static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
  406. struct sk_buff *curr_skb,
  407. struct page *page, void *buf,
  408. int len, int truesize);
  409. static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size)
  410. {
  411. if (!indir_table_size) {
  412. rss->indirection_table = NULL;
  413. return 0;
  414. }
  415. rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL);
  416. if (!rss->indirection_table)
  417. return -ENOMEM;
  418. return 0;
  419. }
  420. static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss)
  421. {
  422. kfree(rss->indirection_table);
  423. }
  424. static bool is_xdp_frame(void *ptr)
  425. {
  426. return (unsigned long)ptr & VIRTIO_XDP_FLAG;
  427. }
  428. static void *xdp_to_ptr(struct xdp_frame *ptr)
  429. {
  430. return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
  431. }
  432. static struct xdp_frame *ptr_to_xdp(void *ptr)
  433. {
  434. return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
  435. }
  436. static bool is_orphan_skb(void *ptr)
  437. {
  438. return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
  439. }
  440. static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
  441. {
  442. return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
  443. }
  444. static struct sk_buff *ptr_to_skb(void *ptr)
  445. {
  446. return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
  447. }
  448. static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
  449. bool in_napi, struct virtnet_sq_free_stats *stats)
  450. {
  451. unsigned int len;
  452. void *ptr;
  453. while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
  454. if (!is_xdp_frame(ptr)) {
  455. struct sk_buff *skb = ptr_to_skb(ptr);
  456. pr_debug("Sent skb %p\n", skb);
  457. if (is_orphan_skb(ptr)) {
  458. stats->packets++;
  459. stats->bytes += skb->len;
  460. } else {
  461. stats->napi_packets++;
  462. stats->napi_bytes += skb->len;
  463. }
  464. napi_consume_skb(skb, in_napi);
  465. } else {
  466. struct xdp_frame *frame = ptr_to_xdp(ptr);
  467. stats->packets++;
  468. stats->bytes += xdp_get_frame_len(frame);
  469. xdp_return_frame(frame);
  470. }
  471. }
  472. netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
  473. }
  474. /* Converting between virtqueue no. and kernel tx/rx queue no.
  475. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
  476. */
  477. static int vq2txq(struct virtqueue *vq)
  478. {
  479. return (vq->index - 1) / 2;
  480. }
  481. static int txq2vq(int txq)
  482. {
  483. return txq * 2 + 1;
  484. }
  485. static int vq2rxq(struct virtqueue *vq)
  486. {
  487. return vq->index / 2;
  488. }
  489. static int rxq2vq(int rxq)
  490. {
  491. return rxq * 2;
  492. }
  493. static int vq_type(struct virtnet_info *vi, int qid)
  494. {
  495. if (qid == vi->max_queue_pairs * 2)
  496. return VIRTNET_Q_TYPE_CQ;
  497. if (qid % 2)
  498. return VIRTNET_Q_TYPE_TX;
  499. return VIRTNET_Q_TYPE_RX;
  500. }
  501. static inline struct virtio_net_common_hdr *
  502. skb_vnet_common_hdr(struct sk_buff *skb)
  503. {
  504. return (struct virtio_net_common_hdr *)skb->cb;
  505. }
  506. /*
  507. * private is used to chain pages for big packets, put the whole
  508. * most recent used list in the beginning for reuse
  509. */
  510. static void give_pages(struct receive_queue *rq, struct page *page)
  511. {
  512. struct page *end;
  513. /* Find end of list, sew whole thing into vi->rq.pages. */
  514. for (end = page; end->private; end = (struct page *)end->private);
  515. end->private = (unsigned long)rq->pages;
  516. rq->pages = page;
  517. }
  518. static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
  519. {
  520. struct page *p = rq->pages;
  521. if (p) {
  522. rq->pages = (struct page *)p->private;
  523. /* clear private here, it is used to chain pages */
  524. p->private = 0;
  525. } else
  526. p = alloc_page(gfp_mask);
  527. return p;
  528. }
  529. static void virtnet_rq_free_buf(struct virtnet_info *vi,
  530. struct receive_queue *rq, void *buf)
  531. {
  532. if (vi->mergeable_rx_bufs)
  533. put_page(virt_to_head_page(buf));
  534. else if (vi->big_packets)
  535. give_pages(rq, buf);
  536. else
  537. put_page(virt_to_head_page(buf));
  538. }
  539. static void enable_delayed_refill(struct virtnet_info *vi)
  540. {
  541. spin_lock_bh(&vi->refill_lock);
  542. vi->refill_enabled = true;
  543. spin_unlock_bh(&vi->refill_lock);
  544. }
  545. static void disable_delayed_refill(struct virtnet_info *vi)
  546. {
  547. spin_lock_bh(&vi->refill_lock);
  548. vi->refill_enabled = false;
  549. spin_unlock_bh(&vi->refill_lock);
  550. }
  551. static void enable_rx_mode_work(struct virtnet_info *vi)
  552. {
  553. rtnl_lock();
  554. vi->rx_mode_work_enabled = true;
  555. rtnl_unlock();
  556. }
  557. static void disable_rx_mode_work(struct virtnet_info *vi)
  558. {
  559. rtnl_lock();
  560. vi->rx_mode_work_enabled = false;
  561. rtnl_unlock();
  562. }
  563. static void virtqueue_napi_schedule(struct napi_struct *napi,
  564. struct virtqueue *vq)
  565. {
  566. if (napi_schedule_prep(napi)) {
  567. virtqueue_disable_cb(vq);
  568. __napi_schedule(napi);
  569. }
  570. }
  571. static bool virtqueue_napi_complete(struct napi_struct *napi,
  572. struct virtqueue *vq, int processed)
  573. {
  574. int opaque;
  575. opaque = virtqueue_enable_cb_prepare(vq);
  576. if (napi_complete_done(napi, processed)) {
  577. if (unlikely(virtqueue_poll(vq, opaque)))
  578. virtqueue_napi_schedule(napi, vq);
  579. else
  580. return true;
  581. } else {
  582. virtqueue_disable_cb(vq);
  583. }
  584. return false;
  585. }
  586. static void skb_xmit_done(struct virtqueue *vq)
  587. {
  588. struct virtnet_info *vi = vq->vdev->priv;
  589. struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
  590. /* Suppress further interrupts. */
  591. virtqueue_disable_cb(vq);
  592. if (napi->weight)
  593. virtqueue_napi_schedule(napi, vq);
  594. else
  595. /* We were probably waiting for more output buffers. */
  596. netif_wake_subqueue(vi->dev, vq2txq(vq));
  597. }
  598. #define MRG_CTX_HEADER_SHIFT 22
  599. static void *mergeable_len_to_ctx(unsigned int truesize,
  600. unsigned int headroom)
  601. {
  602. return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
  603. }
  604. static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
  605. {
  606. return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
  607. }
  608. static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
  609. {
  610. return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
  611. }
  612. static int check_mergeable_len(struct net_device *dev, void *mrg_ctx,
  613. unsigned int len)
  614. {
  615. unsigned int headroom, tailroom, room, truesize;
  616. truesize = mergeable_ctx_to_truesize(mrg_ctx);
  617. headroom = mergeable_ctx_to_headroom(mrg_ctx);
  618. tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  619. room = SKB_DATA_ALIGN(headroom + tailroom);
  620. if (len > truesize - room) {
  621. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  622. dev->name, len, (unsigned long)(truesize - room));
  623. DEV_STATS_INC(dev, rx_length_errors);
  624. return -1;
  625. }
  626. return 0;
  627. }
  628. static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
  629. unsigned int headroom,
  630. unsigned int len)
  631. {
  632. struct sk_buff *skb;
  633. skb = build_skb(buf, buflen);
  634. if (unlikely(!skb))
  635. return NULL;
  636. skb_reserve(skb, headroom);
  637. skb_put(skb, len);
  638. return skb;
  639. }
  640. /* Called from bottom half context */
  641. static struct sk_buff *page_to_skb(struct virtnet_info *vi,
  642. struct receive_queue *rq,
  643. struct page *page, unsigned int offset,
  644. unsigned int len, unsigned int truesize,
  645. unsigned int headroom)
  646. {
  647. struct sk_buff *skb;
  648. struct virtio_net_common_hdr *hdr;
  649. unsigned int copy, hdr_len, hdr_padded_len;
  650. struct page *page_to_free = NULL;
  651. int tailroom, shinfo_size;
  652. char *p, *hdr_p, *buf;
  653. p = page_address(page) + offset;
  654. hdr_p = p;
  655. hdr_len = vi->hdr_len;
  656. if (vi->mergeable_rx_bufs)
  657. hdr_padded_len = hdr_len;
  658. else
  659. hdr_padded_len = sizeof(struct padded_vnet_hdr);
  660. buf = p - headroom;
  661. len -= hdr_len;
  662. offset += hdr_padded_len;
  663. p += hdr_padded_len;
  664. tailroom = truesize - headroom - hdr_padded_len - len;
  665. shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  666. if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
  667. skb = virtnet_build_skb(buf, truesize, p - buf, len);
  668. if (unlikely(!skb))
  669. return NULL;
  670. page = (struct page *)page->private;
  671. if (page)
  672. give_pages(rq, page);
  673. goto ok;
  674. }
  675. /* copy small packet so we can reuse these pages for small data */
  676. skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
  677. if (unlikely(!skb))
  678. return NULL;
  679. /* Copy all frame if it fits skb->head, otherwise
  680. * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
  681. */
  682. if (len <= skb_tailroom(skb))
  683. copy = len;
  684. else
  685. copy = ETH_HLEN;
  686. skb_put_data(skb, p, copy);
  687. len -= copy;
  688. offset += copy;
  689. if (vi->mergeable_rx_bufs) {
  690. if (len)
  691. skb_add_rx_frag(skb, 0, page, offset, len, truesize);
  692. else
  693. page_to_free = page;
  694. goto ok;
  695. }
  696. /*
  697. * Verify that we can indeed put this data into a skb.
  698. * This is here to handle cases when the device erroneously
  699. * tries to receive more than is possible. This is usually
  700. * the case of a broken device.
  701. */
  702. if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
  703. net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
  704. dev_kfree_skb(skb);
  705. return NULL;
  706. }
  707. BUG_ON(offset >= PAGE_SIZE);
  708. while (len) {
  709. unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
  710. skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
  711. frag_size, truesize);
  712. len -= frag_size;
  713. page = (struct page *)page->private;
  714. offset = 0;
  715. }
  716. if (page)
  717. give_pages(rq, page);
  718. ok:
  719. hdr = skb_vnet_common_hdr(skb);
  720. memcpy(hdr, hdr_p, hdr_len);
  721. if (page_to_free)
  722. put_page(page_to_free);
  723. return skb;
  724. }
  725. static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
  726. {
  727. struct page *page = virt_to_head_page(buf);
  728. struct virtnet_rq_dma *dma;
  729. void *head;
  730. int offset;
  731. head = page_address(page);
  732. dma = head;
  733. --dma->ref;
  734. if (dma->need_sync && len) {
  735. offset = buf - (head + sizeof(*dma));
  736. virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr,
  737. offset, len,
  738. DMA_FROM_DEVICE);
  739. }
  740. if (dma->ref)
  741. return;
  742. virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len,
  743. DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
  744. put_page(page);
  745. }
  746. static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
  747. {
  748. void *buf;
  749. buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
  750. if (buf && rq->do_dma)
  751. virtnet_rq_unmap(rq, buf, *len);
  752. return buf;
  753. }
  754. static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
  755. {
  756. struct virtnet_rq_dma *dma;
  757. dma_addr_t addr;
  758. u32 offset;
  759. void *head;
  760. if (!rq->do_dma) {
  761. sg_init_one(rq->sg, buf, len);
  762. return;
  763. }
  764. head = page_address(rq->alloc_frag.page);
  765. offset = buf - head;
  766. dma = head;
  767. addr = dma->addr - sizeof(*dma) + offset;
  768. sg_init_table(rq->sg, 1);
  769. rq->sg[0].dma_address = addr;
  770. rq->sg[0].length = len;
  771. }
  772. static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
  773. {
  774. struct page_frag *alloc_frag = &rq->alloc_frag;
  775. struct virtnet_rq_dma *dma;
  776. void *buf, *head;
  777. dma_addr_t addr;
  778. head = page_address(alloc_frag->page);
  779. if (rq->do_dma) {
  780. dma = head;
  781. /* new pages */
  782. if (!alloc_frag->offset) {
  783. if (rq->last_dma) {
  784. /* Now, the new page is allocated, the last dma
  785. * will not be used. So the dma can be unmapped
  786. * if the ref is 0.
  787. */
  788. virtnet_rq_unmap(rq, rq->last_dma, 0);
  789. rq->last_dma = NULL;
  790. }
  791. dma->len = alloc_frag->size - sizeof(*dma);
  792. addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
  793. dma->len, DMA_FROM_DEVICE, 0);
  794. if (virtqueue_dma_mapping_error(rq->vq, addr))
  795. return NULL;
  796. dma->addr = addr;
  797. dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);
  798. /* Add a reference to dma to prevent the entire dma from
  799. * being released during error handling. This reference
  800. * will be freed after the pages are no longer used.
  801. */
  802. get_page(alloc_frag->page);
  803. dma->ref = 1;
  804. alloc_frag->offset = sizeof(*dma);
  805. rq->last_dma = dma;
  806. }
  807. ++dma->ref;
  808. }
  809. buf = head + alloc_frag->offset;
  810. get_page(alloc_frag->page);
  811. alloc_frag->offset += size;
  812. return buf;
  813. }
  814. static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
  815. {
  816. struct virtnet_info *vi = vq->vdev->priv;
  817. struct receive_queue *rq;
  818. int i = vq2rxq(vq);
  819. rq = &vi->rq[i];
  820. if (rq->xsk_pool) {
  821. xsk_buff_free((struct xdp_buff *)buf);
  822. return;
  823. }
  824. if (rq->do_dma)
  825. virtnet_rq_unmap(rq, buf, 0);
  826. virtnet_rq_free_buf(vi, rq, buf);
  827. }
  828. static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
  829. bool in_napi)
  830. {
  831. struct virtnet_sq_free_stats stats = {0};
  832. __free_old_xmit(sq, txq, in_napi, &stats);
  833. /* Avoid overhead when no packets have been processed
  834. * happens when called speculatively from start_xmit.
  835. */
  836. if (!stats.packets && !stats.napi_packets)
  837. return;
  838. u64_stats_update_begin(&sq->stats.syncp);
  839. u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes);
  840. u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets);
  841. u64_stats_update_end(&sq->stats.syncp);
  842. }
  843. static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
  844. {
  845. if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
  846. return false;
  847. else if (q < vi->curr_queue_pairs)
  848. return true;
  849. else
  850. return false;
  851. }
  852. static void check_sq_full_and_disable(struct virtnet_info *vi,
  853. struct net_device *dev,
  854. struct send_queue *sq)
  855. {
  856. bool use_napi = sq->napi.weight;
  857. int qnum;
  858. qnum = sq - vi->sq;
  859. /* If running out of space, stop queue to avoid getting packets that we
  860. * are then unable to transmit.
  861. * An alternative would be to force queuing layer to requeue the skb by
  862. * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
  863. * returned in a normal path of operation: it means that driver is not
  864. * maintaining the TX queue stop/start state properly, and causes
  865. * the stack to do a non-trivial amount of useless work.
  866. * Since most packets only take 1 or 2 ring slots, stopping the queue
  867. * early means 16 slots are typically wasted.
  868. */
  869. if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
  870. struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
  871. netif_tx_stop_queue(txq);
  872. u64_stats_update_begin(&sq->stats.syncp);
  873. u64_stats_inc(&sq->stats.stop);
  874. u64_stats_update_end(&sq->stats.syncp);
  875. if (use_napi) {
  876. if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
  877. virtqueue_napi_schedule(&sq->napi, sq->vq);
  878. } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
  879. /* More just got used, free them then recheck. */
  880. free_old_xmit(sq, txq, false);
  881. if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
  882. netif_start_subqueue(dev, qnum);
  883. u64_stats_update_begin(&sq->stats.syncp);
  884. u64_stats_inc(&sq->stats.wake);
  885. u64_stats_update_end(&sq->stats.syncp);
  886. virtqueue_disable_cb(sq->vq);
  887. }
  888. }
  889. }
  890. }
  891. static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
  892. {
  893. sg->dma_address = addr;
  894. sg->length = len;
  895. }
  896. /* Note that @len is the length of received data without virtio header */
  897. static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
  898. struct receive_queue *rq, void *buf,
  899. u32 len, bool first_buf)
  900. {
  901. struct xdp_buff *xdp;
  902. u32 bufsize;
  903. xdp = (struct xdp_buff *)buf;
  904. /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for
  905. * virtio header and ask the vhost to fill data from
  906. * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len
  907. * The first buffer has virtio header so the remaining region for frame
  908. * data is
  909. * xsk_pool_get_rx_frame_size()
  910. * While other buffers than the first one do not have virtio header, so
  911. * the maximum frame data's length can be
  912. * xsk_pool_get_rx_frame_size() + vi->hdr_len
  913. */
  914. bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool);
  915. if (!first_buf)
  916. bufsize += vi->hdr_len;
  917. if (unlikely(len > bufsize)) {
  918. pr_debug("%s: rx error: len %u exceeds truesize %u\n",
  919. vi->dev->name, len, bufsize);
  920. DEV_STATS_INC(vi->dev, rx_length_errors);
  921. xsk_buff_free(xdp);
  922. return NULL;
  923. }
  924. xsk_buff_set_size(xdp, len);
  925. xsk_buff_dma_sync_for_cpu(xdp);
  926. return xdp;
  927. }
  928. static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
  929. struct xdp_buff *xdp)
  930. {
  931. unsigned int metasize = xdp->data - xdp->data_meta;
  932. struct sk_buff *skb;
  933. unsigned int size;
  934. size = xdp->data_end - xdp->data_hard_start;
  935. skb = napi_alloc_skb(&rq->napi, size);
  936. if (unlikely(!skb)) {
  937. xsk_buff_free(xdp);
  938. return NULL;
  939. }
  940. skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
  941. size = xdp->data_end - xdp->data_meta;
  942. memcpy(__skb_put(skb, size), xdp->data_meta, size);
  943. if (metasize) {
  944. __skb_pull(skb, metasize);
  945. skb_metadata_set(skb, metasize);
  946. }
  947. xsk_buff_free(xdp);
  948. return skb;
  949. }
  950. static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
  951. struct receive_queue *rq, struct xdp_buff *xdp,
  952. unsigned int *xdp_xmit,
  953. struct virtnet_rq_stats *stats)
  954. {
  955. struct bpf_prog *prog;
  956. u32 ret;
  957. ret = XDP_PASS;
  958. rcu_read_lock();
  959. prog = rcu_dereference(rq->xdp_prog);
  960. if (prog)
  961. ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
  962. rcu_read_unlock();
  963. switch (ret) {
  964. case XDP_PASS:
  965. return xsk_construct_skb(rq, xdp);
  966. case XDP_TX:
  967. case XDP_REDIRECT:
  968. return NULL;
  969. default:
  970. /* drop packet */
  971. xsk_buff_free(xdp);
  972. u64_stats_inc(&stats->drops);
  973. return NULL;
  974. }
  975. }
  976. static void xsk_drop_follow_bufs(struct net_device *dev,
  977. struct receive_queue *rq,
  978. u32 num_buf,
  979. struct virtnet_rq_stats *stats)
  980. {
  981. struct xdp_buff *xdp;
  982. u32 len;
  983. while (num_buf-- > 1) {
  984. xdp = virtqueue_get_buf(rq->vq, &len);
  985. if (unlikely(!xdp)) {
  986. pr_debug("%s: rx error: %d buffers missing\n",
  987. dev->name, num_buf);
  988. DEV_STATS_INC(dev, rx_length_errors);
  989. break;
  990. }
  991. u64_stats_add(&stats->bytes, len);
  992. xsk_buff_free(xdp);
  993. }
  994. }
  995. static int xsk_append_merge_buffer(struct virtnet_info *vi,
  996. struct receive_queue *rq,
  997. struct sk_buff *head_skb,
  998. u32 num_buf,
  999. struct virtio_net_hdr_mrg_rxbuf *hdr,
  1000. struct virtnet_rq_stats *stats)
  1001. {
  1002. struct sk_buff *curr_skb;
  1003. struct xdp_buff *xdp;
  1004. u32 len, truesize;
  1005. struct page *page;
  1006. void *buf;
  1007. curr_skb = head_skb;
  1008. while (--num_buf) {
  1009. buf = virtqueue_get_buf(rq->vq, &len);
  1010. if (unlikely(!buf)) {
  1011. pr_debug("%s: rx error: %d buffers out of %d missing\n",
  1012. vi->dev->name, num_buf,
  1013. virtio16_to_cpu(vi->vdev,
  1014. hdr->num_buffers));
  1015. DEV_STATS_INC(vi->dev, rx_length_errors);
  1016. return -EINVAL;
  1017. }
  1018. u64_stats_add(&stats->bytes, len);
  1019. xdp = buf_to_xdp(vi, rq, buf, len, false);
  1020. if (!xdp)
  1021. goto err;
  1022. buf = napi_alloc_frag(len);
  1023. if (!buf) {
  1024. xsk_buff_free(xdp);
  1025. goto err;
  1026. }
  1027. memcpy(buf, xdp->data - vi->hdr_len, len);
  1028. xsk_buff_free(xdp);
  1029. page = virt_to_page(buf);
  1030. truesize = len;
  1031. curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
  1032. buf, len, truesize);
  1033. if (!curr_skb) {
  1034. put_page(page);
  1035. goto err;
  1036. }
  1037. }
  1038. return 0;
  1039. err:
  1040. xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
  1041. return -EINVAL;
  1042. }
  1043. static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
  1044. struct receive_queue *rq, struct xdp_buff *xdp,
  1045. unsigned int *xdp_xmit,
  1046. struct virtnet_rq_stats *stats)
  1047. {
  1048. struct virtio_net_hdr_mrg_rxbuf *hdr;
  1049. struct bpf_prog *prog;
  1050. struct sk_buff *skb;
  1051. u32 ret, num_buf;
  1052. hdr = xdp->data - vi->hdr_len;
  1053. num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
  1054. ret = XDP_PASS;
  1055. rcu_read_lock();
  1056. prog = rcu_dereference(rq->xdp_prog);
  1057. /* TODO: support multi buffer. */
  1058. if (prog && num_buf == 1)
  1059. ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
  1060. rcu_read_unlock();
  1061. switch (ret) {
  1062. case XDP_PASS:
  1063. skb = xsk_construct_skb(rq, xdp);
  1064. if (!skb)
  1065. goto drop_bufs;
  1066. if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
  1067. dev_kfree_skb(skb);
  1068. goto drop;
  1069. }
  1070. return skb;
  1071. case XDP_TX:
  1072. case XDP_REDIRECT:
  1073. return NULL;
  1074. default:
  1075. /* drop packet */
  1076. xsk_buff_free(xdp);
  1077. }
  1078. drop_bufs:
  1079. xsk_drop_follow_bufs(dev, rq, num_buf, stats);
  1080. drop:
  1081. u64_stats_inc(&stats->drops);
  1082. return NULL;
  1083. }
  1084. static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
  1085. void *buf, u32 len,
  1086. unsigned int *xdp_xmit,
  1087. struct virtnet_rq_stats *stats)
  1088. {
  1089. struct net_device *dev = vi->dev;
  1090. struct sk_buff *skb = NULL;
  1091. struct xdp_buff *xdp;
  1092. u8 flags;
  1093. len -= vi->hdr_len;
  1094. u64_stats_add(&stats->bytes, len);
  1095. xdp = buf_to_xdp(vi, rq, buf, len, true);
  1096. if (!xdp)
  1097. return;
  1098. if (unlikely(len < ETH_HLEN)) {
  1099. pr_debug("%s: short packet %i\n", dev->name, len);
  1100. DEV_STATS_INC(dev, rx_length_errors);
  1101. xsk_buff_free(xdp);
  1102. return;
  1103. }
  1104. flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
  1105. if (!vi->mergeable_rx_bufs)
  1106. skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
  1107. else
  1108. skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats);
  1109. if (skb)
  1110. virtnet_receive_done(vi, rq, skb, flags);
  1111. }
  1112. static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq,
  1113. struct xsk_buff_pool *pool, gfp_t gfp)
  1114. {
  1115. struct xdp_buff **xsk_buffs;
  1116. dma_addr_t addr;
  1117. int err = 0;
  1118. u32 len, i;
  1119. int num;
  1120. xsk_buffs = rq->xsk_buffs;
  1121. num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free);
  1122. if (!num)
  1123. return -ENOMEM;
  1124. len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
  1125. for (i = 0; i < num; ++i) {
  1126. /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
  1127. * We assume XDP_PACKET_HEADROOM is larger than hdr->len.
  1128. * (see function virtnet_xsk_pool_enable)
  1129. */
  1130. addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len;
  1131. sg_init_table(rq->sg, 1);
  1132. sg_fill_dma(rq->sg, addr, len);
  1133. err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp);
  1134. if (err)
  1135. goto err;
  1136. }
  1137. return num;
  1138. err:
  1139. for (; i < num; ++i)
  1140. xsk_buff_free(xsk_buffs[i]);
  1141. return err;
  1142. }
  1143. static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
  1144. {
  1145. struct virtnet_info *vi = netdev_priv(dev);
  1146. struct send_queue *sq;
  1147. if (!netif_running(dev))
  1148. return -ENETDOWN;
  1149. if (qid >= vi->curr_queue_pairs)
  1150. return -EINVAL;
  1151. sq = &vi->sq[qid];
  1152. if (napi_if_scheduled_mark_missed(&sq->napi))
  1153. return 0;
  1154. local_bh_disable();
  1155. virtqueue_napi_schedule(&sq->napi, sq->vq);
  1156. local_bh_enable();
  1157. return 0;
  1158. }
  1159. static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
  1160. struct send_queue *sq,
  1161. struct xdp_frame *xdpf)
  1162. {
  1163. struct virtio_net_hdr_mrg_rxbuf *hdr;
  1164. struct skb_shared_info *shinfo;
  1165. u8 nr_frags = 0;
  1166. int err, i;
  1167. if (unlikely(xdpf->headroom < vi->hdr_len))
  1168. return -EOVERFLOW;
  1169. if (unlikely(xdp_frame_has_frags(xdpf))) {
  1170. shinfo = xdp_get_shared_info_from_frame(xdpf);
  1171. nr_frags = shinfo->nr_frags;
  1172. }
  1173. /* In wrapping function virtnet_xdp_xmit(), we need to free
  1174. * up the pending old buffers, where we need to calculate the
  1175. * position of skb_shared_info in xdp_get_frame_len() and
  1176. * xdp_return_frame(), which will involve to xdpf->data and
  1177. * xdpf->headroom. Therefore, we need to update the value of
  1178. * headroom synchronously here.
  1179. */
  1180. xdpf->headroom -= vi->hdr_len;
  1181. xdpf->data -= vi->hdr_len;
  1182. /* Zero header and leave csum up to XDP layers */
  1183. hdr = xdpf->data;
  1184. memset(hdr, 0, vi->hdr_len);
  1185. xdpf->len += vi->hdr_len;
  1186. sg_init_table(sq->sg, nr_frags + 1);
  1187. sg_set_buf(sq->sg, xdpf->data, xdpf->len);
  1188. for (i = 0; i < nr_frags; i++) {
  1189. skb_frag_t *frag = &shinfo->frags[i];
  1190. sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
  1191. skb_frag_size(frag), skb_frag_off(frag));
  1192. }
  1193. err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
  1194. xdp_to_ptr(xdpf), GFP_ATOMIC);
  1195. if (unlikely(err))
  1196. return -ENOSPC; /* Caller handle free/refcnt */
  1197. return 0;
  1198. }
  1199. /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
  1200. * the current cpu, so it does not need to be locked.
  1201. *
  1202. * Here we use marco instead of inline functions because we have to deal with
  1203. * three issues at the same time: 1. the choice of sq. 2. judge and execute the
  1204. * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
  1205. * functions to perfectly solve these three problems at the same time.
  1206. */
  1207. #define virtnet_xdp_get_sq(vi) ({ \
  1208. int cpu = smp_processor_id(); \
  1209. struct netdev_queue *txq; \
  1210. typeof(vi) v = (vi); \
  1211. unsigned int qp; \
  1212. \
  1213. if (v->curr_queue_pairs > nr_cpu_ids) { \
  1214. qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
  1215. qp += cpu; \
  1216. txq = netdev_get_tx_queue(v->dev, qp); \
  1217. __netif_tx_acquire(txq); \
  1218. } else { \
  1219. qp = cpu % v->curr_queue_pairs; \
  1220. txq = netdev_get_tx_queue(v->dev, qp); \
  1221. __netif_tx_lock(txq, cpu); \
  1222. } \
  1223. v->sq + qp; \
  1224. })
  1225. #define virtnet_xdp_put_sq(vi, q) { \
  1226. struct netdev_queue *txq; \
  1227. typeof(vi) v = (vi); \
  1228. \
  1229. txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
  1230. if (v->curr_queue_pairs > nr_cpu_ids) \
  1231. __netif_tx_release(txq); \
  1232. else \
  1233. __netif_tx_unlock(txq); \
  1234. }
  1235. static int virtnet_xdp_xmit(struct net_device *dev,
  1236. int n, struct xdp_frame **frames, u32 flags)
  1237. {
  1238. struct virtnet_info *vi = netdev_priv(dev);
  1239. struct virtnet_sq_free_stats stats = {0};
  1240. struct receive_queue *rq = vi->rq;
  1241. struct bpf_prog *xdp_prog;
  1242. struct send_queue *sq;
  1243. int nxmit = 0;
  1244. int kicks = 0;
  1245. int ret;
  1246. int i;
  1247. /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
  1248. * indicate XDP resources have been successfully allocated.
  1249. */
  1250. xdp_prog = rcu_access_pointer(rq->xdp_prog);
  1251. if (!xdp_prog)
  1252. return -ENXIO;
  1253. sq = virtnet_xdp_get_sq(vi);
  1254. if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
  1255. ret = -EINVAL;
  1256. goto out;
  1257. }
  1258. /* Free up any pending old buffers before queueing new ones. */
  1259. __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
  1260. false, &stats);
  1261. for (i = 0; i < n; i++) {
  1262. struct xdp_frame *xdpf = frames[i];
  1263. if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
  1264. break;
  1265. nxmit++;
  1266. }
  1267. ret = nxmit;
  1268. if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
  1269. check_sq_full_and_disable(vi, dev, sq);
  1270. if (flags & XDP_XMIT_FLUSH) {
  1271. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
  1272. kicks = 1;
  1273. }
  1274. out:
  1275. u64_stats_update_begin(&sq->stats.syncp);
  1276. u64_stats_add(&sq->stats.bytes, stats.bytes);
  1277. u64_stats_add(&sq->stats.packets, stats.packets);
  1278. u64_stats_add(&sq->stats.xdp_tx, n);
  1279. u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit);
  1280. u64_stats_add(&sq->stats.kicks, kicks);
  1281. u64_stats_update_end(&sq->stats.syncp);
  1282. virtnet_xdp_put_sq(vi, sq);
  1283. return ret;
  1284. }
  1285. static void put_xdp_frags(struct xdp_buff *xdp)
  1286. {
  1287. struct skb_shared_info *shinfo;
  1288. struct page *xdp_page;
  1289. int i;
  1290. if (xdp_buff_has_frags(xdp)) {
  1291. shinfo = xdp_get_shared_info_from_buff(xdp);
  1292. for (i = 0; i < shinfo->nr_frags; i++) {
  1293. xdp_page = skb_frag_page(&shinfo->frags[i]);
  1294. put_page(xdp_page);
  1295. }
  1296. }
  1297. }
  1298. static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
  1299. struct net_device *dev,
  1300. unsigned int *xdp_xmit,
  1301. struct virtnet_rq_stats *stats)
  1302. {
  1303. struct xdp_frame *xdpf;
  1304. int err;
  1305. u32 act;
  1306. act = bpf_prog_run_xdp(xdp_prog, xdp);
  1307. u64_stats_inc(&stats->xdp_packets);
  1308. switch (act) {
  1309. case XDP_PASS:
  1310. return act;
  1311. case XDP_TX:
  1312. u64_stats_inc(&stats->xdp_tx);
  1313. xdpf = xdp_convert_buff_to_frame(xdp);
  1314. if (unlikely(!xdpf)) {
  1315. netdev_dbg(dev, "convert buff to frame failed for xdp\n");
  1316. return XDP_DROP;
  1317. }
  1318. err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
  1319. if (unlikely(!err)) {
  1320. xdp_return_frame_rx_napi(xdpf);
  1321. } else if (unlikely(err < 0)) {
  1322. trace_xdp_exception(dev, xdp_prog, act);
  1323. return XDP_DROP;
  1324. }
  1325. *xdp_xmit |= VIRTIO_XDP_TX;
  1326. return act;
  1327. case XDP_REDIRECT:
  1328. u64_stats_inc(&stats->xdp_redirects);
  1329. err = xdp_do_redirect(dev, xdp, xdp_prog);
  1330. if (err)
  1331. return XDP_DROP;
  1332. *xdp_xmit |= VIRTIO_XDP_REDIR;
  1333. return act;
  1334. default:
  1335. bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
  1336. fallthrough;
  1337. case XDP_ABORTED:
  1338. trace_xdp_exception(dev, xdp_prog, act);
  1339. fallthrough;
  1340. case XDP_DROP:
  1341. return XDP_DROP;
  1342. }
  1343. }
  1344. static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
  1345. {
  1346. return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0;
  1347. }
  1348. /* We copy the packet for XDP in the following cases:
  1349. *
  1350. * 1) Packet is scattered across multiple rx buffers.
  1351. * 2) Headroom space is insufficient.
  1352. *
  1353. * This is inefficient but it's a temporary condition that
  1354. * we hit right after XDP is enabled and until queue is refilled
  1355. * with large buffers with sufficient headroom - so it should affect
  1356. * at most queue size packets.
  1357. * Afterwards, the conditions to enable
  1358. * XDP should preclude the underlying device from sending packets
  1359. * across multiple buffers (num_buf > 1), and we make sure buffers
  1360. * have enough headroom.
  1361. */
  1362. static struct page *xdp_linearize_page(struct net_device *dev,
  1363. struct receive_queue *rq,
  1364. int *num_buf,
  1365. struct page *p,
  1366. int offset,
  1367. int page_off,
  1368. unsigned int *len)
  1369. {
  1370. int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1371. struct page *page;
  1372. if (page_off + *len + tailroom > PAGE_SIZE)
  1373. return NULL;
  1374. page = alloc_page(GFP_ATOMIC);
  1375. if (!page)
  1376. return NULL;
  1377. memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
  1378. page_off += *len;
  1379. /* Only mergeable mode can go inside this while loop. In small mode,
  1380. * *num_buf == 1, so it cannot go inside.
  1381. */
  1382. while (--*num_buf) {
  1383. unsigned int buflen;
  1384. void *buf;
  1385. void *ctx;
  1386. int off;
  1387. buf = virtnet_rq_get_buf(rq, &buflen, &ctx);
  1388. if (unlikely(!buf))
  1389. goto err_buf;
  1390. p = virt_to_head_page(buf);
  1391. off = buf - page_address(p);
  1392. if (check_mergeable_len(dev, ctx, buflen)) {
  1393. put_page(p);
  1394. goto err_buf;
  1395. }
  1396. /* guard against a misconfigured or uncooperative backend that
  1397. * is sending packet larger than the MTU.
  1398. */
  1399. if ((page_off + buflen + tailroom) > PAGE_SIZE) {
  1400. put_page(p);
  1401. goto err_buf;
  1402. }
  1403. memcpy(page_address(page) + page_off,
  1404. page_address(p) + off, buflen);
  1405. page_off += buflen;
  1406. put_page(p);
  1407. }
  1408. /* Headroom does not contribute to packet length */
  1409. *len = page_off - XDP_PACKET_HEADROOM;
  1410. return page;
  1411. err_buf:
  1412. __free_pages(page, 0);
  1413. return NULL;
  1414. }
  1415. static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
  1416. unsigned int xdp_headroom,
  1417. void *buf,
  1418. unsigned int len)
  1419. {
  1420. unsigned int header_offset;
  1421. unsigned int headroom;
  1422. unsigned int buflen;
  1423. struct sk_buff *skb;
  1424. header_offset = VIRTNET_RX_PAD + xdp_headroom;
  1425. headroom = vi->hdr_len + header_offset;
  1426. buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  1427. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1428. skb = virtnet_build_skb(buf, buflen, headroom, len);
  1429. if (unlikely(!skb))
  1430. return NULL;
  1431. buf += header_offset;
  1432. memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len);
  1433. return skb;
  1434. }
  1435. static struct sk_buff *receive_small_xdp(struct net_device *dev,
  1436. struct virtnet_info *vi,
  1437. struct receive_queue *rq,
  1438. struct bpf_prog *xdp_prog,
  1439. void *buf,
  1440. unsigned int xdp_headroom,
  1441. unsigned int len,
  1442. unsigned int *xdp_xmit,
  1443. struct virtnet_rq_stats *stats)
  1444. {
  1445. unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
  1446. unsigned int headroom = vi->hdr_len + header_offset;
  1447. struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
  1448. struct page *page = virt_to_head_page(buf);
  1449. struct page *xdp_page;
  1450. unsigned int buflen;
  1451. struct xdp_buff xdp;
  1452. struct sk_buff *skb;
  1453. unsigned int metasize = 0;
  1454. u32 act;
  1455. if (unlikely(hdr->hdr.gso_type))
  1456. goto err_xdp;
  1457. /* Partially checksummed packets must be dropped. */
  1458. if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
  1459. goto err_xdp;
  1460. buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  1461. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1462. if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
  1463. int offset = buf - page_address(page) + header_offset;
  1464. unsigned int tlen = len + vi->hdr_len;
  1465. int num_buf = 1;
  1466. xdp_headroom = virtnet_get_headroom(vi);
  1467. header_offset = VIRTNET_RX_PAD + xdp_headroom;
  1468. headroom = vi->hdr_len + header_offset;
  1469. buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  1470. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1471. xdp_page = xdp_linearize_page(dev, rq, &num_buf, page,
  1472. offset, header_offset,
  1473. &tlen);
  1474. if (!xdp_page)
  1475. goto err_xdp;
  1476. buf = page_address(xdp_page);
  1477. put_page(page);
  1478. page = xdp_page;
  1479. }
  1480. xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
  1481. xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
  1482. xdp_headroom, len, true);
  1483. act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
  1484. switch (act) {
  1485. case XDP_PASS:
  1486. /* Recalculate length in case bpf program changed it */
  1487. len = xdp.data_end - xdp.data;
  1488. metasize = xdp.data - xdp.data_meta;
  1489. break;
  1490. case XDP_TX:
  1491. case XDP_REDIRECT:
  1492. goto xdp_xmit;
  1493. default:
  1494. goto err_xdp;
  1495. }
  1496. skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
  1497. if (unlikely(!skb))
  1498. goto err;
  1499. if (metasize)
  1500. skb_metadata_set(skb, metasize);
  1501. return skb;
  1502. err_xdp:
  1503. u64_stats_inc(&stats->xdp_drops);
  1504. err:
  1505. u64_stats_inc(&stats->drops);
  1506. put_page(page);
  1507. xdp_xmit:
  1508. return NULL;
  1509. }
  1510. static struct sk_buff *receive_small(struct net_device *dev,
  1511. struct virtnet_info *vi,
  1512. struct receive_queue *rq,
  1513. void *buf, void *ctx,
  1514. unsigned int len,
  1515. unsigned int *xdp_xmit,
  1516. struct virtnet_rq_stats *stats)
  1517. {
  1518. unsigned int xdp_headroom = (unsigned long)ctx;
  1519. struct page *page = virt_to_head_page(buf);
  1520. struct sk_buff *skb;
  1521. /* We passed the address of virtnet header to virtio-core,
  1522. * so truncate the padding.
  1523. */
  1524. buf -= VIRTNET_RX_PAD + xdp_headroom;
  1525. len -= vi->hdr_len;
  1526. u64_stats_add(&stats->bytes, len);
  1527. if (unlikely(len > GOOD_PACKET_LEN)) {
  1528. pr_debug("%s: rx error: len %u exceeds max size %d\n",
  1529. dev->name, len, GOOD_PACKET_LEN);
  1530. DEV_STATS_INC(dev, rx_length_errors);
  1531. goto err;
  1532. }
  1533. if (unlikely(vi->xdp_enabled)) {
  1534. struct bpf_prog *xdp_prog;
  1535. rcu_read_lock();
  1536. xdp_prog = rcu_dereference(rq->xdp_prog);
  1537. if (xdp_prog) {
  1538. skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
  1539. xdp_headroom, len, xdp_xmit,
  1540. stats);
  1541. rcu_read_unlock();
  1542. return skb;
  1543. }
  1544. rcu_read_unlock();
  1545. }
  1546. skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
  1547. if (likely(skb))
  1548. return skb;
  1549. err:
  1550. u64_stats_inc(&stats->drops);
  1551. put_page(page);
  1552. return NULL;
  1553. }
  1554. static struct sk_buff *receive_big(struct net_device *dev,
  1555. struct virtnet_info *vi,
  1556. struct receive_queue *rq,
  1557. void *buf,
  1558. unsigned int len,
  1559. struct virtnet_rq_stats *stats)
  1560. {
  1561. struct page *page = buf;
  1562. struct sk_buff *skb =
  1563. page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
  1564. u64_stats_add(&stats->bytes, len - vi->hdr_len);
  1565. if (unlikely(!skb))
  1566. goto err;
  1567. return skb;
  1568. err:
  1569. u64_stats_inc(&stats->drops);
  1570. give_pages(rq, page);
  1571. return NULL;
  1572. }
  1573. static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
  1574. struct net_device *dev,
  1575. struct virtnet_rq_stats *stats)
  1576. {
  1577. struct page *page;
  1578. void *buf;
  1579. int len;
  1580. while (num_buf-- > 1) {
  1581. buf = virtnet_rq_get_buf(rq, &len, NULL);
  1582. if (unlikely(!buf)) {
  1583. pr_debug("%s: rx error: %d buffers missing\n",
  1584. dev->name, num_buf);
  1585. DEV_STATS_INC(dev, rx_length_errors);
  1586. break;
  1587. }
  1588. u64_stats_add(&stats->bytes, len);
  1589. page = virt_to_head_page(buf);
  1590. put_page(page);
  1591. }
  1592. }
  1593. /* Why not use xdp_build_skb_from_frame() ?
  1594. * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
  1595. * virtio-net there are 2 points that do not match its requirements:
  1596. * 1. The size of the prefilled buffer is not fixed before xdp is set.
  1597. * 2. xdp_build_skb_from_frame() does more checks that we don't need,
  1598. * like eth_type_trans() (which virtio-net does in receive_buf()).
  1599. */
  1600. static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
  1601. struct virtnet_info *vi,
  1602. struct xdp_buff *xdp,
  1603. unsigned int xdp_frags_truesz)
  1604. {
  1605. struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
  1606. unsigned int headroom, data_len;
  1607. struct sk_buff *skb;
  1608. int metasize;
  1609. u8 nr_frags;
  1610. if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
  1611. pr_debug("Error building skb as missing reserved tailroom for xdp");
  1612. return NULL;
  1613. }
  1614. if (unlikely(xdp_buff_has_frags(xdp)))
  1615. nr_frags = sinfo->nr_frags;
  1616. skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
  1617. if (unlikely(!skb))
  1618. return NULL;
  1619. headroom = xdp->data - xdp->data_hard_start;
  1620. data_len = xdp->data_end - xdp->data;
  1621. skb_reserve(skb, headroom);
  1622. __skb_put(skb, data_len);
  1623. metasize = xdp->data - xdp->data_meta;
  1624. metasize = metasize > 0 ? metasize : 0;
  1625. if (metasize)
  1626. skb_metadata_set(skb, metasize);
  1627. if (unlikely(xdp_buff_has_frags(xdp)))
  1628. xdp_update_skb_shared_info(skb, nr_frags,
  1629. sinfo->xdp_frags_size,
  1630. xdp_frags_truesz,
  1631. xdp_buff_is_frag_pfmemalloc(xdp));
  1632. return skb;
  1633. }
  1634. /* TODO: build xdp in big mode */
  1635. static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
  1636. struct virtnet_info *vi,
  1637. struct receive_queue *rq,
  1638. struct xdp_buff *xdp,
  1639. void *buf,
  1640. unsigned int len,
  1641. unsigned int frame_sz,
  1642. int *num_buf,
  1643. unsigned int *xdp_frags_truesize,
  1644. struct virtnet_rq_stats *stats)
  1645. {
  1646. struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
  1647. unsigned int headroom, tailroom, room;
  1648. unsigned int truesize, cur_frag_size;
  1649. struct skb_shared_info *shinfo;
  1650. unsigned int xdp_frags_truesz = 0;
  1651. struct page *page;
  1652. skb_frag_t *frag;
  1653. int offset;
  1654. void *ctx;
  1655. xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
  1656. xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM,
  1657. XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
  1658. if (!*num_buf)
  1659. return 0;
  1660. if (*num_buf > 1) {
  1661. /* If we want to build multi-buffer xdp, we need
  1662. * to specify that the flags of xdp_buff have the
  1663. * XDP_FLAGS_HAS_FRAG bit.
  1664. */
  1665. if (!xdp_buff_has_frags(xdp))
  1666. xdp_buff_set_frags_flag(xdp);
  1667. shinfo = xdp_get_shared_info_from_buff(xdp);
  1668. shinfo->nr_frags = 0;
  1669. shinfo->xdp_frags_size = 0;
  1670. }
  1671. if (*num_buf > MAX_SKB_FRAGS + 1)
  1672. return -EINVAL;
  1673. while (--*num_buf > 0) {
  1674. buf = virtnet_rq_get_buf(rq, &len, &ctx);
  1675. if (unlikely(!buf)) {
  1676. pr_debug("%s: rx error: %d buffers out of %d missing\n",
  1677. dev->name, *num_buf,
  1678. virtio16_to_cpu(vi->vdev, hdr->num_buffers));
  1679. DEV_STATS_INC(dev, rx_length_errors);
  1680. goto err;
  1681. }
  1682. u64_stats_add(&stats->bytes, len);
  1683. page = virt_to_head_page(buf);
  1684. offset = buf - page_address(page);
  1685. truesize = mergeable_ctx_to_truesize(ctx);
  1686. headroom = mergeable_ctx_to_headroom(ctx);
  1687. tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  1688. room = SKB_DATA_ALIGN(headroom + tailroom);
  1689. cur_frag_size = truesize;
  1690. xdp_frags_truesz += cur_frag_size;
  1691. if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
  1692. put_page(page);
  1693. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  1694. dev->name, len, (unsigned long)(truesize - room));
  1695. DEV_STATS_INC(dev, rx_length_errors);
  1696. goto err;
  1697. }
  1698. frag = &shinfo->frags[shinfo->nr_frags++];
  1699. skb_frag_fill_page_desc(frag, page, offset, len);
  1700. if (page_is_pfmemalloc(page))
  1701. xdp_buff_set_frag_pfmemalloc(xdp);
  1702. shinfo->xdp_frags_size += len;
  1703. }
  1704. *xdp_frags_truesize = xdp_frags_truesz;
  1705. return 0;
  1706. err:
  1707. put_xdp_frags(xdp);
  1708. return -EINVAL;
  1709. }
  1710. static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
  1711. struct receive_queue *rq,
  1712. struct bpf_prog *xdp_prog,
  1713. void *ctx,
  1714. unsigned int *frame_sz,
  1715. int *num_buf,
  1716. struct page **page,
  1717. int offset,
  1718. unsigned int *len,
  1719. struct virtio_net_hdr_mrg_rxbuf *hdr)
  1720. {
  1721. unsigned int truesize = mergeable_ctx_to_truesize(ctx);
  1722. unsigned int headroom = mergeable_ctx_to_headroom(ctx);
  1723. struct page *xdp_page;
  1724. unsigned int xdp_room;
  1725. /* Transient failure which in theory could occur if
  1726. * in-flight packets from before XDP was enabled reach
  1727. * the receive path after XDP is loaded.
  1728. */
  1729. if (unlikely(hdr->hdr.gso_type))
  1730. return NULL;
  1731. /* Partially checksummed packets must be dropped. */
  1732. if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
  1733. return NULL;
  1734. /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
  1735. * with headroom may add hole in truesize, which
  1736. * make their length exceed PAGE_SIZE. So we disabled the
  1737. * hole mechanism for xdp. See add_recvbuf_mergeable().
  1738. */
  1739. *frame_sz = truesize;
  1740. if (likely(headroom >= virtnet_get_headroom(vi) &&
  1741. (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
  1742. return page_address(*page) + offset;
  1743. }
  1744. /* This happens when headroom is not enough because
  1745. * of the buffer was prefilled before XDP is set.
  1746. * This should only happen for the first several packets.
  1747. * In fact, vq reset can be used here to help us clean up
  1748. * the prefilled buffers, but many existing devices do not
  1749. * support it, and we don't want to bother users who are
  1750. * using xdp normally.
  1751. */
  1752. if (!xdp_prog->aux->xdp_has_frags) {
  1753. /* linearize data for XDP */
  1754. xdp_page = xdp_linearize_page(vi->dev, rq, num_buf,
  1755. *page, offset,
  1756. XDP_PACKET_HEADROOM,
  1757. len);
  1758. if (!xdp_page)
  1759. return NULL;
  1760. } else {
  1761. xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
  1762. sizeof(struct skb_shared_info));
  1763. if (*len + xdp_room > PAGE_SIZE)
  1764. return NULL;
  1765. xdp_page = alloc_page(GFP_ATOMIC);
  1766. if (!xdp_page)
  1767. return NULL;
  1768. memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM,
  1769. page_address(*page) + offset, *len);
  1770. }
  1771. *frame_sz = PAGE_SIZE;
  1772. put_page(*page);
  1773. *page = xdp_page;
  1774. return page_address(*page) + XDP_PACKET_HEADROOM;
  1775. }
  1776. static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
  1777. struct virtnet_info *vi,
  1778. struct receive_queue *rq,
  1779. struct bpf_prog *xdp_prog,
  1780. void *buf,
  1781. void *ctx,
  1782. unsigned int len,
  1783. unsigned int *xdp_xmit,
  1784. struct virtnet_rq_stats *stats)
  1785. {
  1786. struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
  1787. int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
  1788. struct page *page = virt_to_head_page(buf);
  1789. int offset = buf - page_address(page);
  1790. unsigned int xdp_frags_truesz = 0;
  1791. struct sk_buff *head_skb;
  1792. unsigned int frame_sz;
  1793. struct xdp_buff xdp;
  1794. void *data;
  1795. u32 act;
  1796. int err;
  1797. data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
  1798. offset, &len, hdr);
  1799. if (unlikely(!data))
  1800. goto err_xdp;
  1801. err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
  1802. &num_buf, &xdp_frags_truesz, stats);
  1803. if (unlikely(err))
  1804. goto err_xdp;
  1805. act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
  1806. switch (act) {
  1807. case XDP_PASS:
  1808. head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
  1809. if (unlikely(!head_skb))
  1810. break;
  1811. return head_skb;
  1812. case XDP_TX:
  1813. case XDP_REDIRECT:
  1814. return NULL;
  1815. default:
  1816. break;
  1817. }
  1818. put_xdp_frags(&xdp);
  1819. err_xdp:
  1820. put_page(page);
  1821. mergeable_buf_free(rq, num_buf, dev, stats);
  1822. u64_stats_inc(&stats->xdp_drops);
  1823. u64_stats_inc(&stats->drops);
  1824. return NULL;
  1825. }
  1826. static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
  1827. struct sk_buff *curr_skb,
  1828. struct page *page, void *buf,
  1829. int len, int truesize)
  1830. {
  1831. int num_skb_frags;
  1832. int offset;
  1833. num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
  1834. if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
  1835. struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
  1836. if (unlikely(!nskb))
  1837. return NULL;
  1838. if (curr_skb == head_skb)
  1839. skb_shinfo(curr_skb)->frag_list = nskb;
  1840. else
  1841. curr_skb->next = nskb;
  1842. curr_skb = nskb;
  1843. head_skb->truesize += nskb->truesize;
  1844. num_skb_frags = 0;
  1845. }
  1846. if (curr_skb != head_skb) {
  1847. head_skb->data_len += len;
  1848. head_skb->len += len;
  1849. head_skb->truesize += truesize;
  1850. }
  1851. offset = buf - page_address(page);
  1852. if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
  1853. put_page(page);
  1854. skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
  1855. len, truesize);
  1856. } else {
  1857. skb_add_rx_frag(curr_skb, num_skb_frags, page,
  1858. offset, len, truesize);
  1859. }
  1860. return curr_skb;
  1861. }
  1862. static struct sk_buff *receive_mergeable(struct net_device *dev,
  1863. struct virtnet_info *vi,
  1864. struct receive_queue *rq,
  1865. void *buf,
  1866. void *ctx,
  1867. unsigned int len,
  1868. unsigned int *xdp_xmit,
  1869. struct virtnet_rq_stats *stats)
  1870. {
  1871. struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
  1872. int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
  1873. struct page *page = virt_to_head_page(buf);
  1874. int offset = buf - page_address(page);
  1875. struct sk_buff *head_skb, *curr_skb;
  1876. unsigned int truesize = mergeable_ctx_to_truesize(ctx);
  1877. unsigned int headroom = mergeable_ctx_to_headroom(ctx);
  1878. unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  1879. unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
  1880. head_skb = NULL;
  1881. u64_stats_add(&stats->bytes, len - vi->hdr_len);
  1882. if (unlikely(len > truesize - room)) {
  1883. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  1884. dev->name, len, (unsigned long)(truesize - room));
  1885. DEV_STATS_INC(dev, rx_length_errors);
  1886. goto err_skb;
  1887. }
  1888. if (unlikely(vi->xdp_enabled)) {
  1889. struct bpf_prog *xdp_prog;
  1890. rcu_read_lock();
  1891. xdp_prog = rcu_dereference(rq->xdp_prog);
  1892. if (xdp_prog) {
  1893. head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
  1894. len, xdp_xmit, stats);
  1895. rcu_read_unlock();
  1896. return head_skb;
  1897. }
  1898. rcu_read_unlock();
  1899. }
  1900. head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
  1901. curr_skb = head_skb;
  1902. if (unlikely(!curr_skb))
  1903. goto err_skb;
  1904. while (--num_buf) {
  1905. buf = virtnet_rq_get_buf(rq, &len, &ctx);
  1906. if (unlikely(!buf)) {
  1907. pr_debug("%s: rx error: %d buffers out of %d missing\n",
  1908. dev->name, num_buf,
  1909. virtio16_to_cpu(vi->vdev,
  1910. hdr->num_buffers));
  1911. DEV_STATS_INC(dev, rx_length_errors);
  1912. goto err_buf;
  1913. }
  1914. u64_stats_add(&stats->bytes, len);
  1915. page = virt_to_head_page(buf);
  1916. truesize = mergeable_ctx_to_truesize(ctx);
  1917. headroom = mergeable_ctx_to_headroom(ctx);
  1918. tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  1919. room = SKB_DATA_ALIGN(headroom + tailroom);
  1920. if (unlikely(len > truesize - room)) {
  1921. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  1922. dev->name, len, (unsigned long)(truesize - room));
  1923. DEV_STATS_INC(dev, rx_length_errors);
  1924. goto err_skb;
  1925. }
  1926. curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
  1927. buf, len, truesize);
  1928. if (!curr_skb)
  1929. goto err_skb;
  1930. }
  1931. ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
  1932. return head_skb;
  1933. err_skb:
  1934. put_page(page);
  1935. mergeable_buf_free(rq, num_buf, dev, stats);
  1936. err_buf:
  1937. u64_stats_inc(&stats->drops);
  1938. dev_kfree_skb(head_skb);
  1939. return NULL;
  1940. }
  1941. static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
  1942. struct sk_buff *skb)
  1943. {
  1944. enum pkt_hash_types rss_hash_type;
  1945. if (!hdr_hash || !skb)
  1946. return;
  1947. switch (__le16_to_cpu(hdr_hash->hash_report)) {
  1948. case VIRTIO_NET_HASH_REPORT_TCPv4:
  1949. case VIRTIO_NET_HASH_REPORT_UDPv4:
  1950. case VIRTIO_NET_HASH_REPORT_TCPv6:
  1951. case VIRTIO_NET_HASH_REPORT_UDPv6:
  1952. case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
  1953. case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
  1954. rss_hash_type = PKT_HASH_TYPE_L4;
  1955. break;
  1956. case VIRTIO_NET_HASH_REPORT_IPv4:
  1957. case VIRTIO_NET_HASH_REPORT_IPv6:
  1958. case VIRTIO_NET_HASH_REPORT_IPv6_EX:
  1959. rss_hash_type = PKT_HASH_TYPE_L3;
  1960. break;
  1961. case VIRTIO_NET_HASH_REPORT_NONE:
  1962. default:
  1963. rss_hash_type = PKT_HASH_TYPE_NONE;
  1964. }
  1965. skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type);
  1966. }
  1967. static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
  1968. struct sk_buff *skb, u8 flags)
  1969. {
  1970. struct virtio_net_common_hdr *hdr;
  1971. struct net_device *dev = vi->dev;
  1972. hdr = skb_vnet_common_hdr(skb);
  1973. if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
  1974. virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);
  1975. if (flags & VIRTIO_NET_HDR_F_DATA_VALID)
  1976. skb->ip_summed = CHECKSUM_UNNECESSARY;
  1977. if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
  1978. virtio_is_little_endian(vi->vdev))) {
  1979. net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
  1980. dev->name, hdr->hdr.gso_type,
  1981. hdr->hdr.gso_size);
  1982. goto frame_err;
  1983. }
  1984. skb_record_rx_queue(skb, vq2rxq(rq->vq));
  1985. skb->protocol = eth_type_trans(skb, dev);
  1986. pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
  1987. ntohs(skb->protocol), skb->len, skb->pkt_type);
  1988. napi_gro_receive(&rq->napi, skb);
  1989. return;
  1990. frame_err:
  1991. DEV_STATS_INC(dev, rx_frame_errors);
  1992. dev_kfree_skb(skb);
  1993. }
  1994. static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
  1995. void *buf, unsigned int len, void **ctx,
  1996. unsigned int *xdp_xmit,
  1997. struct virtnet_rq_stats *stats)
  1998. {
  1999. struct net_device *dev = vi->dev;
  2000. struct sk_buff *skb;
  2001. u8 flags;
  2002. if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
  2003. pr_debug("%s: short packet %i\n", dev->name, len);
  2004. DEV_STATS_INC(dev, rx_length_errors);
  2005. virtnet_rq_free_buf(vi, rq, buf);
  2006. return;
  2007. }
  2008. /* 1. Save the flags early, as the XDP program might overwrite them.
  2009. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
  2010. * stay valid after XDP processing.
  2011. * 2. XDP doesn't work with partially checksummed packets (refer to
  2012. * virtnet_xdp_set()), so packets marked as
  2013. * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
  2014. */
  2015. flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
  2016. if (vi->mergeable_rx_bufs)
  2017. skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
  2018. stats);
  2019. else if (vi->big_packets)
  2020. skb = receive_big(dev, vi, rq, buf, len, stats);
  2021. else
  2022. skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
  2023. if (unlikely(!skb))
  2024. return;
  2025. virtnet_receive_done(vi, rq, skb, flags);
  2026. }
  2027. /* Unlike mergeable buffers, all buffers are allocated to the
  2028. * same size, except for the headroom. For this reason we do
  2029. * not need to use mergeable_len_to_ctx here - it is enough
  2030. * to store the headroom as the context ignoring the truesize.
  2031. */
  2032. static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
  2033. gfp_t gfp)
  2034. {
  2035. char *buf;
  2036. unsigned int xdp_headroom = virtnet_get_headroom(vi);
  2037. void *ctx = (void *)(unsigned long)xdp_headroom;
  2038. int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
  2039. int err;
  2040. len = SKB_DATA_ALIGN(len) +
  2041. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  2042. if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp)))
  2043. return -ENOMEM;
  2044. buf = virtnet_rq_alloc(rq, len, gfp);
  2045. if (unlikely(!buf))
  2046. return -ENOMEM;
  2047. buf += VIRTNET_RX_PAD + xdp_headroom;
  2048. virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN);
  2049. err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
  2050. if (err < 0) {
  2051. if (rq->do_dma)
  2052. virtnet_rq_unmap(rq, buf, 0);
  2053. put_page(virt_to_head_page(buf));
  2054. }
  2055. return err;
  2056. }
  2057. static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
  2058. gfp_t gfp)
  2059. {
  2060. struct page *first, *list = NULL;
  2061. char *p;
  2062. int i, err, offset;
  2063. sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
  2064. /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
  2065. for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
  2066. first = get_a_page(rq, gfp);
  2067. if (!first) {
  2068. if (list)
  2069. give_pages(rq, list);
  2070. return -ENOMEM;
  2071. }
  2072. sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
  2073. /* chain new page in list head to match sg */
  2074. first->private = (unsigned long)list;
  2075. list = first;
  2076. }
  2077. first = get_a_page(rq, gfp);
  2078. if (!first) {
  2079. give_pages(rq, list);
  2080. return -ENOMEM;
  2081. }
  2082. p = page_address(first);
  2083. /* rq->sg[0], rq->sg[1] share the same page */
  2084. /* a separated rq->sg[0] for header - required in case !any_header_sg */
  2085. sg_set_buf(&rq->sg[0], p, vi->hdr_len);
  2086. /* rq->sg[1] for data packet, from offset */
  2087. offset = sizeof(struct padded_vnet_hdr);
  2088. sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
  2089. /* chain first in list head */
  2090. first->private = (unsigned long)list;
  2091. err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2,
  2092. first, gfp);
  2093. if (err < 0)
  2094. give_pages(rq, first);
  2095. return err;
  2096. }
  2097. static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
  2098. struct ewma_pkt_len *avg_pkt_len,
  2099. unsigned int room)
  2100. {
  2101. struct virtnet_info *vi = rq->vq->vdev->priv;
  2102. const size_t hdr_len = vi->hdr_len;
  2103. unsigned int len;
  2104. if (room)
  2105. return PAGE_SIZE - room;
  2106. len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
  2107. rq->min_buf_len, PAGE_SIZE - hdr_len);
  2108. return ALIGN(len, L1_CACHE_BYTES);
  2109. }
  2110. static int add_recvbuf_mergeable(struct virtnet_info *vi,
  2111. struct receive_queue *rq, gfp_t gfp)
  2112. {
  2113. struct page_frag *alloc_frag = &rq->alloc_frag;
  2114. unsigned int headroom = virtnet_get_headroom(vi);
  2115. unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  2116. unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
  2117. unsigned int len, hole;
  2118. void *ctx;
  2119. char *buf;
  2120. int err;
  2121. /* Extra tailroom is needed to satisfy XDP's assumption. This
  2122. * means rx frags coalescing won't work, but consider we've
  2123. * disabled GSO for XDP, it won't be a big issue.
  2124. */
  2125. len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
  2126. if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
  2127. return -ENOMEM;
  2128. if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size)
  2129. len -= sizeof(struct virtnet_rq_dma);
  2130. buf = virtnet_rq_alloc(rq, len + room, gfp);
  2131. if (unlikely(!buf))
  2132. return -ENOMEM;
  2133. buf += headroom; /* advance address leaving hole at front of pkt */
  2134. hole = alloc_frag->size - alloc_frag->offset;
  2135. if (hole < len + room) {
  2136. /* To avoid internal fragmentation, if there is very likely not
  2137. * enough space for another buffer, add the remaining space to
  2138. * the current buffer.
  2139. * XDP core assumes that frame_size of xdp_buff and the length
  2140. * of the frag are PAGE_SIZE, so we disable the hole mechanism.
  2141. */
  2142. if (!headroom)
  2143. len += hole;
  2144. alloc_frag->offset += hole;
  2145. }
  2146. virtnet_rq_init_one_sg(rq, buf, len);
  2147. ctx = mergeable_len_to_ctx(len + room, headroom);
  2148. err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
  2149. if (err < 0) {
  2150. if (rq->do_dma)
  2151. virtnet_rq_unmap(rq, buf, 0);
  2152. put_page(virt_to_head_page(buf));
  2153. }
  2154. return err;
  2155. }
  2156. /*
  2157. * Returns false if we couldn't fill entirely (OOM).
  2158. *
  2159. * Normally run in the receive path, but can also be run from ndo_open
  2160. * before we're receiving packets, or from refill_work which is
  2161. * careful to disable receiving (using napi_disable).
  2162. */
  2163. static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
  2164. gfp_t gfp)
  2165. {
  2166. int err;
  2167. if (rq->xsk_pool) {
  2168. err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp);
  2169. goto kick;
  2170. }
  2171. do {
  2172. if (vi->mergeable_rx_bufs)
  2173. err = add_recvbuf_mergeable(vi, rq, gfp);
  2174. else if (vi->big_packets)
  2175. err = add_recvbuf_big(vi, rq, gfp);
  2176. else
  2177. err = add_recvbuf_small(vi, rq, gfp);
  2178. if (err)
  2179. break;
  2180. } while (rq->vq->num_free);
  2181. kick:
  2182. if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
  2183. unsigned long flags;
  2184. flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
  2185. u64_stats_inc(&rq->stats.kicks);
  2186. u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
  2187. }
  2188. return err != -ENOMEM;
  2189. }
  2190. static void skb_recv_done(struct virtqueue *rvq)
  2191. {
  2192. struct virtnet_info *vi = rvq->vdev->priv;
  2193. struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
  2194. rq->calls++;
  2195. virtqueue_napi_schedule(&rq->napi, rvq);
  2196. }
  2197. static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
  2198. {
  2199. napi_enable(napi);
  2200. /* If all buffers were filled by other side before we napi_enabled, we
  2201. * won't get another interrupt, so process any outstanding packets now.
  2202. * Call local_bh_enable after to trigger softIRQ processing.
  2203. */
  2204. local_bh_disable();
  2205. virtqueue_napi_schedule(napi, vq);
  2206. local_bh_enable();
  2207. }
  2208. static void virtnet_napi_tx_enable(struct virtnet_info *vi,
  2209. struct virtqueue *vq,
  2210. struct napi_struct *napi)
  2211. {
  2212. if (!napi->weight)
  2213. return;
  2214. /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
  2215. * enable the feature if this is likely affine with the transmit path.
  2216. */
  2217. if (!vi->affinity_hint_set) {
  2218. napi->weight = 0;
  2219. return;
  2220. }
  2221. return virtnet_napi_enable(vq, napi);
  2222. }
  2223. static void virtnet_napi_tx_disable(struct napi_struct *napi)
  2224. {
  2225. if (napi->weight)
  2226. napi_disable(napi);
  2227. }
  2228. static void refill_work(struct work_struct *work)
  2229. {
  2230. struct virtnet_info *vi =
  2231. container_of(work, struct virtnet_info, refill.work);
  2232. bool still_empty;
  2233. int i;
  2234. for (i = 0; i < vi->curr_queue_pairs; i++) {
  2235. struct receive_queue *rq = &vi->rq[i];
  2236. napi_disable(&rq->napi);
  2237. still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
  2238. virtnet_napi_enable(rq->vq, &rq->napi);
  2239. /* In theory, this can happen: if we don't get any buffers in
  2240. * we will *never* try to fill again.
  2241. */
  2242. if (still_empty)
  2243. schedule_delayed_work(&vi->refill, HZ/2);
  2244. }
  2245. }
  2246. static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
  2247. struct receive_queue *rq,
  2248. int budget,
  2249. unsigned int *xdp_xmit,
  2250. struct virtnet_rq_stats *stats)
  2251. {
  2252. unsigned int len;
  2253. int packets = 0;
  2254. void *buf;
  2255. while (packets < budget) {
  2256. buf = virtqueue_get_buf(rq->vq, &len);
  2257. if (!buf)
  2258. break;
  2259. virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
  2260. packets++;
  2261. }
  2262. return packets;
  2263. }
  2264. static int virtnet_receive_packets(struct virtnet_info *vi,
  2265. struct receive_queue *rq,
  2266. int budget,
  2267. unsigned int *xdp_xmit,
  2268. struct virtnet_rq_stats *stats)
  2269. {
  2270. unsigned int len;
  2271. int packets = 0;
  2272. void *buf;
  2273. if (!vi->big_packets || vi->mergeable_rx_bufs) {
  2274. void *ctx;
  2275. while (packets < budget &&
  2276. (buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
  2277. receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
  2278. packets++;
  2279. }
  2280. } else {
  2281. while (packets < budget &&
  2282. (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
  2283. receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
  2284. packets++;
  2285. }
  2286. }
  2287. return packets;
  2288. }
  2289. static int virtnet_receive(struct receive_queue *rq, int budget,
  2290. unsigned int *xdp_xmit)
  2291. {
  2292. struct virtnet_info *vi = rq->vq->vdev->priv;
  2293. struct virtnet_rq_stats stats = {};
  2294. int i, packets;
  2295. if (rq->xsk_pool)
  2296. packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats);
  2297. else
  2298. packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
  2299. if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
  2300. if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
  2301. spin_lock(&vi->refill_lock);
  2302. if (vi->refill_enabled)
  2303. schedule_delayed_work(&vi->refill, 0);
  2304. spin_unlock(&vi->refill_lock);
  2305. }
  2306. }
  2307. u64_stats_set(&stats.packets, packets);
  2308. u64_stats_update_begin(&rq->stats.syncp);
  2309. for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
  2310. size_t offset = virtnet_rq_stats_desc[i].offset;
  2311. u64_stats_t *item, *src;
  2312. item = (u64_stats_t *)((u8 *)&rq->stats + offset);
  2313. src = (u64_stats_t *)((u8 *)&stats + offset);
  2314. u64_stats_add(item, u64_stats_read(src));
  2315. }
  2316. u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets));
  2317. u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes));
  2318. u64_stats_update_end(&rq->stats.syncp);
  2319. return packets;
  2320. }
  2321. static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
  2322. {
  2323. struct virtnet_info *vi = rq->vq->vdev->priv;
  2324. unsigned int index = vq2rxq(rq->vq);
  2325. struct send_queue *sq = &vi->sq[index];
  2326. struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
  2327. if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
  2328. return;
  2329. if (__netif_tx_trylock(txq)) {
  2330. if (sq->reset) {
  2331. __netif_tx_unlock(txq);
  2332. return;
  2333. }
  2334. do {
  2335. virtqueue_disable_cb(sq->vq);
  2336. free_old_xmit(sq, txq, !!budget);
  2337. } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
  2338. if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
  2339. if (netif_tx_queue_stopped(txq)) {
  2340. u64_stats_update_begin(&sq->stats.syncp);
  2341. u64_stats_inc(&sq->stats.wake);
  2342. u64_stats_update_end(&sq->stats.syncp);
  2343. }
  2344. netif_tx_wake_queue(txq);
  2345. }
  2346. __netif_tx_unlock(txq);
  2347. }
  2348. }
  2349. static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq)
  2350. {
  2351. struct dim_sample cur_sample = {};
  2352. if (!rq->packets_in_napi)
  2353. return;
  2354. /* Don't need protection when fetching stats, since fetcher and
  2355. * updater of the stats are in same context
  2356. */
  2357. dim_update_sample(rq->calls,
  2358. u64_stats_read(&rq->stats.packets),
  2359. u64_stats_read(&rq->stats.bytes),
  2360. &cur_sample);
  2361. net_dim(&rq->dim, cur_sample);
  2362. rq->packets_in_napi = 0;
  2363. }
  2364. static int virtnet_poll(struct napi_struct *napi, int budget)
  2365. {
  2366. struct receive_queue *rq =
  2367. container_of(napi, struct receive_queue, napi);
  2368. struct virtnet_info *vi = rq->vq->vdev->priv;
  2369. struct send_queue *sq;
  2370. unsigned int received;
  2371. unsigned int xdp_xmit = 0;
  2372. bool napi_complete;
  2373. virtnet_poll_cleantx(rq, budget);
  2374. received = virtnet_receive(rq, budget, &xdp_xmit);
  2375. rq->packets_in_napi += received;
  2376. if (xdp_xmit & VIRTIO_XDP_REDIR)
  2377. xdp_do_flush();
  2378. /* Out of packets? */
  2379. if (received < budget) {
  2380. napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
  2381. /* Intentionally not taking dim_lock here. This may result in a
  2382. * spurious net_dim call. But if that happens virtnet_rx_dim_work
  2383. * will not act on the scheduled work.
  2384. */
  2385. if (napi_complete && rq->dim_enabled)
  2386. virtnet_rx_dim_update(vi, rq);
  2387. }
  2388. if (xdp_xmit & VIRTIO_XDP_TX) {
  2389. sq = virtnet_xdp_get_sq(vi);
  2390. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
  2391. u64_stats_update_begin(&sq->stats.syncp);
  2392. u64_stats_inc(&sq->stats.kicks);
  2393. u64_stats_update_end(&sq->stats.syncp);
  2394. }
  2395. virtnet_xdp_put_sq(vi, sq);
  2396. }
  2397. return received;
  2398. }
  2399. static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
  2400. {
  2401. virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
  2402. napi_disable(&vi->rq[qp_index].napi);
  2403. xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
  2404. }
  2405. static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
  2406. {
  2407. struct net_device *dev = vi->dev;
  2408. int err;
  2409. err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
  2410. vi->rq[qp_index].napi.napi_id);
  2411. if (err < 0)
  2412. return err;
  2413. err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
  2414. MEM_TYPE_PAGE_SHARED, NULL);
  2415. if (err < 0)
  2416. goto err_xdp_reg_mem_model;
  2417. virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
  2418. virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
  2419. return 0;
  2420. err_xdp_reg_mem_model:
  2421. xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
  2422. return err;
  2423. }
  2424. static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim)
  2425. {
  2426. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  2427. return;
  2428. net_dim_work_cancel(dim);
  2429. }
  2430. static void virtnet_update_settings(struct virtnet_info *vi)
  2431. {
  2432. u32 speed;
  2433. u8 duplex;
  2434. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
  2435. return;
  2436. virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
  2437. if (ethtool_validate_speed(speed))
  2438. vi->speed = speed;
  2439. virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
  2440. if (ethtool_validate_duplex(duplex))
  2441. vi->duplex = duplex;
  2442. }
  2443. static int virtnet_open(struct net_device *dev)
  2444. {
  2445. struct virtnet_info *vi = netdev_priv(dev);
  2446. int i, err;
  2447. enable_delayed_refill(vi);
  2448. for (i = 0; i < vi->max_queue_pairs; i++) {
  2449. if (i < vi->curr_queue_pairs)
  2450. /* Make sure we have some buffers: if oom use wq. */
  2451. if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
  2452. schedule_delayed_work(&vi->refill, 0);
  2453. err = virtnet_enable_queue_pair(vi, i);
  2454. if (err < 0)
  2455. goto err_enable_qp;
  2456. }
  2457. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
  2458. if (vi->status & VIRTIO_NET_S_LINK_UP)
  2459. netif_carrier_on(vi->dev);
  2460. virtio_config_driver_enable(vi->vdev);
  2461. } else {
  2462. vi->status = VIRTIO_NET_S_LINK_UP;
  2463. netif_carrier_on(dev);
  2464. }
  2465. return 0;
  2466. err_enable_qp:
  2467. disable_delayed_refill(vi);
  2468. cancel_delayed_work_sync(&vi->refill);
  2469. for (i--; i >= 0; i--) {
  2470. virtnet_disable_queue_pair(vi, i);
  2471. virtnet_cancel_dim(vi, &vi->rq[i].dim);
  2472. }
  2473. return err;
  2474. }
  2475. static int virtnet_poll_tx(struct napi_struct *napi, int budget)
  2476. {
  2477. struct send_queue *sq = container_of(napi, struct send_queue, napi);
  2478. struct virtnet_info *vi = sq->vq->vdev->priv;
  2479. unsigned int index = vq2txq(sq->vq);
  2480. struct netdev_queue *txq;
  2481. int opaque;
  2482. bool done;
  2483. if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
  2484. /* We don't need to enable cb for XDP */
  2485. napi_complete_done(napi, 0);
  2486. return 0;
  2487. }
  2488. txq = netdev_get_tx_queue(vi->dev, index);
  2489. __netif_tx_lock(txq, raw_smp_processor_id());
  2490. virtqueue_disable_cb(sq->vq);
  2491. free_old_xmit(sq, txq, !!budget);
  2492. if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
  2493. if (netif_tx_queue_stopped(txq)) {
  2494. u64_stats_update_begin(&sq->stats.syncp);
  2495. u64_stats_inc(&sq->stats.wake);
  2496. u64_stats_update_end(&sq->stats.syncp);
  2497. }
  2498. netif_tx_wake_queue(txq);
  2499. }
  2500. opaque = virtqueue_enable_cb_prepare(sq->vq);
  2501. done = napi_complete_done(napi, 0);
  2502. if (!done)
  2503. virtqueue_disable_cb(sq->vq);
  2504. __netif_tx_unlock(txq);
  2505. if (done) {
  2506. if (unlikely(virtqueue_poll(sq->vq, opaque))) {
  2507. if (napi_schedule_prep(napi)) {
  2508. __netif_tx_lock(txq, raw_smp_processor_id());
  2509. virtqueue_disable_cb(sq->vq);
  2510. __netif_tx_unlock(txq);
  2511. __napi_schedule(napi);
  2512. }
  2513. }
  2514. }
  2515. return 0;
  2516. }
  2517. static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
  2518. {
  2519. struct virtio_net_hdr_mrg_rxbuf *hdr;
  2520. const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
  2521. struct virtnet_info *vi = sq->vq->vdev->priv;
  2522. int num_sg;
  2523. unsigned hdr_len = vi->hdr_len;
  2524. bool can_push;
  2525. pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
  2526. can_push = vi->any_header_sg &&
  2527. !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
  2528. !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
  2529. /* Even if we can, don't push here yet as this would skew
  2530. * csum_start offset below. */
  2531. if (can_push)
  2532. hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
  2533. else
  2534. hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;
  2535. if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
  2536. virtio_is_little_endian(vi->vdev), false,
  2537. 0))
  2538. return -EPROTO;
  2539. if (vi->mergeable_rx_bufs)
  2540. hdr->num_buffers = 0;
  2541. sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
  2542. if (can_push) {
  2543. __skb_push(skb, hdr_len);
  2544. num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
  2545. if (unlikely(num_sg < 0))
  2546. return num_sg;
  2547. /* Pull header back to avoid skew in tx bytes calculations. */
  2548. __skb_pull(skb, hdr_len);
  2549. } else {
  2550. sg_set_buf(sq->sg, hdr, hdr_len);
  2551. num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
  2552. if (unlikely(num_sg < 0))
  2553. return num_sg;
  2554. num_sg++;
  2555. }
  2556. return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
  2557. skb_to_ptr(skb, orphan), GFP_ATOMIC);
  2558. }
  2559. static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
  2560. {
  2561. struct virtnet_info *vi = netdev_priv(dev);
  2562. int qnum = skb_get_queue_mapping(skb);
  2563. struct send_queue *sq = &vi->sq[qnum];
  2564. int err;
  2565. struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
  2566. bool xmit_more = netdev_xmit_more();
  2567. bool use_napi = sq->napi.weight;
  2568. bool kick;
  2569. /* Free up any pending old buffers before queueing new ones. */
  2570. do {
  2571. if (use_napi)
  2572. virtqueue_disable_cb(sq->vq);
  2573. free_old_xmit(sq, txq, false);
  2574. } while (use_napi && !xmit_more &&
  2575. unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
  2576. /* timestamp packet in software */
  2577. skb_tx_timestamp(skb);
  2578. /* Try to transmit */
  2579. err = xmit_skb(sq, skb, !use_napi);
  2580. /* This should not happen! */
  2581. if (unlikely(err)) {
  2582. DEV_STATS_INC(dev, tx_fifo_errors);
  2583. if (net_ratelimit())
  2584. dev_warn(&dev->dev,
  2585. "Unexpected TXQ (%d) queue failure: %d\n",
  2586. qnum, err);
  2587. DEV_STATS_INC(dev, tx_dropped);
  2588. dev_kfree_skb_any(skb);
  2589. return NETDEV_TX_OK;
  2590. }
  2591. /* Don't wait up for transmitted skbs to be freed. */
  2592. if (!use_napi) {
  2593. skb_orphan(skb);
  2594. nf_reset_ct(skb);
  2595. }
  2596. check_sq_full_and_disable(vi, dev, sq);
  2597. kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
  2598. !xmit_more || netif_xmit_stopped(txq);
  2599. if (kick) {
  2600. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
  2601. u64_stats_update_begin(&sq->stats.syncp);
  2602. u64_stats_inc(&sq->stats.kicks);
  2603. u64_stats_update_end(&sq->stats.syncp);
  2604. }
  2605. }
  2606. return NETDEV_TX_OK;
  2607. }
  2608. static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
  2609. {
  2610. bool running = netif_running(vi->dev);
  2611. if (running) {
  2612. napi_disable(&rq->napi);
  2613. virtnet_cancel_dim(vi, &rq->dim);
  2614. }
  2615. }
  2616. static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
  2617. {
  2618. bool running = netif_running(vi->dev);
  2619. if (!try_fill_recv(vi, rq, GFP_KERNEL))
  2620. schedule_delayed_work(&vi->refill, 0);
  2621. if (running)
  2622. virtnet_napi_enable(rq->vq, &rq->napi);
  2623. }
  2624. static int virtnet_rx_resize(struct virtnet_info *vi,
  2625. struct receive_queue *rq, u32 ring_num)
  2626. {
  2627. int err, qindex;
  2628. qindex = rq - vi->rq;
  2629. virtnet_rx_pause(vi, rq);
  2630. err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL);
  2631. if (err)
  2632. netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
  2633. virtnet_rx_resume(vi, rq);
  2634. return err;
  2635. }
  2636. static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
  2637. {
  2638. bool running = netif_running(vi->dev);
  2639. struct netdev_queue *txq;
  2640. int qindex;
  2641. qindex = sq - vi->sq;
  2642. if (running)
  2643. virtnet_napi_tx_disable(&sq->napi);
  2644. txq = netdev_get_tx_queue(vi->dev, qindex);
  2645. /* 1. wait all ximt complete
  2646. * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
  2647. */
  2648. __netif_tx_lock_bh(txq);
  2649. /* Prevent rx poll from accessing sq. */
  2650. sq->reset = true;
  2651. /* Prevent the upper layer from trying to send packets. */
  2652. netif_stop_subqueue(vi->dev, qindex);
  2653. __netif_tx_unlock_bh(txq);
  2654. }
  2655. static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
  2656. {
  2657. bool running = netif_running(vi->dev);
  2658. struct netdev_queue *txq;
  2659. int qindex;
  2660. qindex = sq - vi->sq;
  2661. txq = netdev_get_tx_queue(vi->dev, qindex);
  2662. __netif_tx_lock_bh(txq);
  2663. sq->reset = false;
  2664. netif_tx_wake_queue(txq);
  2665. __netif_tx_unlock_bh(txq);
  2666. if (running)
  2667. virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
  2668. }
  2669. static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
  2670. u32 ring_num)
  2671. {
  2672. int qindex, err;
  2673. if (ring_num <= MAX_SKB_FRAGS + 2) {
  2674. netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n",
  2675. ring_num, MAX_SKB_FRAGS + 2);
  2676. return -EINVAL;
  2677. }
  2678. qindex = sq - vi->sq;
  2679. virtnet_tx_pause(vi, sq);
  2680. err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf,
  2681. virtnet_sq_free_unused_buf_done);
  2682. if (err)
  2683. netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
  2684. virtnet_tx_resume(vi, sq);
  2685. return err;
  2686. }
  2687. /*
  2688. * Send command via the control virtqueue and check status. Commands
  2689. * supported by the hypervisor, as indicated by feature bits, should
  2690. * never fail unless improperly formatted.
  2691. */
  2692. static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
  2693. struct scatterlist *out,
  2694. struct scatterlist *in)
  2695. {
  2696. struct scatterlist *sgs[5], hdr, stat;
  2697. u32 out_num = 0, tmp, in_num = 0;
  2698. bool ok;
  2699. int ret;
  2700. /* Caller should know better */
  2701. BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
  2702. mutex_lock(&vi->cvq_lock);
  2703. vi->ctrl->status = ~0;
  2704. vi->ctrl->hdr.class = class;
  2705. vi->ctrl->hdr.cmd = cmd;
  2706. /* Add header */
  2707. sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
  2708. sgs[out_num++] = &hdr;
  2709. if (out)
  2710. sgs[out_num++] = out;
  2711. /* Add return status. */
  2712. sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
  2713. sgs[out_num + in_num++] = &stat;
  2714. if (in)
  2715. sgs[out_num + in_num++] = in;
  2716. BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
  2717. ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
  2718. if (ret < 0) {
  2719. dev_warn(&vi->vdev->dev,
  2720. "Failed to add sgs for command vq: %d\n.", ret);
  2721. mutex_unlock(&vi->cvq_lock);
  2722. return false;
  2723. }
  2724. if (unlikely(!virtqueue_kick(vi->cvq)))
  2725. goto unlock;
  2726. /* Spin for a response, the kick causes an ioport write, trapping
  2727. * into the hypervisor, so the request should be handled immediately.
  2728. */
  2729. while (!virtqueue_get_buf(vi->cvq, &tmp) &&
  2730. !virtqueue_is_broken(vi->cvq)) {
  2731. cond_resched();
  2732. cpu_relax();
  2733. }
  2734. unlock:
  2735. ok = vi->ctrl->status == VIRTIO_NET_OK;
  2736. mutex_unlock(&vi->cvq_lock);
  2737. return ok;
  2738. }
  2739. static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
  2740. struct scatterlist *out)
  2741. {
  2742. return virtnet_send_command_reply(vi, class, cmd, out, NULL);
  2743. }
  2744. static int virtnet_set_mac_address(struct net_device *dev, void *p)
  2745. {
  2746. struct virtnet_info *vi = netdev_priv(dev);
  2747. struct virtio_device *vdev = vi->vdev;
  2748. int ret;
  2749. struct sockaddr *addr;
  2750. struct scatterlist sg;
  2751. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  2752. return -EOPNOTSUPP;
  2753. addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
  2754. if (!addr)
  2755. return -ENOMEM;
  2756. ret = eth_prepare_mac_addr_change(dev, addr);
  2757. if (ret)
  2758. goto out;
  2759. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  2760. sg_init_one(&sg, addr->sa_data, dev->addr_len);
  2761. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  2762. VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
  2763. dev_warn(&vdev->dev,
  2764. "Failed to set mac address by vq command.\n");
  2765. ret = -EINVAL;
  2766. goto out;
  2767. }
  2768. } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
  2769. !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
  2770. unsigned int i;
  2771. /* Naturally, this has an atomicity problem. */
  2772. for (i = 0; i < dev->addr_len; i++)
  2773. virtio_cwrite8(vdev,
  2774. offsetof(struct virtio_net_config, mac) +
  2775. i, addr->sa_data[i]);
  2776. }
  2777. eth_commit_mac_addr_change(dev, p);
  2778. ret = 0;
  2779. out:
  2780. kfree(addr);
  2781. return ret;
  2782. }
  2783. static void virtnet_stats(struct net_device *dev,
  2784. struct rtnl_link_stats64 *tot)
  2785. {
  2786. struct virtnet_info *vi = netdev_priv(dev);
  2787. unsigned int start;
  2788. int i;
  2789. for (i = 0; i < vi->max_queue_pairs; i++) {
  2790. u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
  2791. struct receive_queue *rq = &vi->rq[i];
  2792. struct send_queue *sq = &vi->sq[i];
  2793. do {
  2794. start = u64_stats_fetch_begin(&sq->stats.syncp);
  2795. tpackets = u64_stats_read(&sq->stats.packets);
  2796. tbytes = u64_stats_read(&sq->stats.bytes);
  2797. terrors = u64_stats_read(&sq->stats.tx_timeouts);
  2798. } while (u64_stats_fetch_retry(&sq->stats.syncp, start));
  2799. do {
  2800. start = u64_stats_fetch_begin(&rq->stats.syncp);
  2801. rpackets = u64_stats_read(&rq->stats.packets);
  2802. rbytes = u64_stats_read(&rq->stats.bytes);
  2803. rdrops = u64_stats_read(&rq->stats.drops);
  2804. } while (u64_stats_fetch_retry(&rq->stats.syncp, start));
  2805. tot->rx_packets += rpackets;
  2806. tot->tx_packets += tpackets;
  2807. tot->rx_bytes += rbytes;
  2808. tot->tx_bytes += tbytes;
  2809. tot->rx_dropped += rdrops;
  2810. tot->tx_errors += terrors;
  2811. }
  2812. tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
  2813. tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
  2814. tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
  2815. tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
  2816. }
  2817. static void virtnet_ack_link_announce(struct virtnet_info *vi)
  2818. {
  2819. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
  2820. VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
  2821. dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
  2822. }
  2823. static bool virtnet_commit_rss_command(struct virtnet_info *vi);
  2824. static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs)
  2825. {
  2826. u32 indir_val = 0;
  2827. int i = 0;
  2828. for (; i < vi->rss_indir_table_size; ++i) {
  2829. indir_val = ethtool_rxfh_indir_default(i, queue_pairs);
  2830. vi->rss.indirection_table[i] = indir_val;
  2831. }
  2832. vi->rss.max_tx_vq = queue_pairs;
  2833. }
  2834. static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
  2835. {
  2836. struct virtio_net_ctrl_mq *mq __free(kfree) = NULL;
  2837. struct virtio_net_ctrl_rss old_rss;
  2838. struct net_device *dev = vi->dev;
  2839. struct scatterlist sg;
  2840. if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
  2841. return 0;
  2842. /* Firstly check if we need update rss. Do updating if both (1) rss enabled and
  2843. * (2) no user configuration.
  2844. *
  2845. * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
  2846. * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
  2847. * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
  2848. */
  2849. if (vi->has_rss && !netif_is_rxfh_configured(dev)) {
  2850. memcpy(&old_rss, &vi->rss, sizeof(old_rss));
  2851. if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) {
  2852. vi->rss.indirection_table = old_rss.indirection_table;
  2853. return -ENOMEM;
  2854. }
  2855. virtnet_rss_update_by_qpairs(vi, queue_pairs);
  2856. if (!virtnet_commit_rss_command(vi)) {
  2857. /* restore ctrl_rss if commit_rss_command failed */
  2858. rss_indirection_table_free(&vi->rss);
  2859. memcpy(&vi->rss, &old_rss, sizeof(old_rss));
  2860. dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
  2861. queue_pairs);
  2862. return -EINVAL;
  2863. }
  2864. rss_indirection_table_free(&old_rss);
  2865. goto succ;
  2866. }
  2867. mq = kzalloc(sizeof(*mq), GFP_KERNEL);
  2868. if (!mq)
  2869. return -ENOMEM;
  2870. mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
  2871. sg_init_one(&sg, mq, sizeof(*mq));
  2872. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
  2873. VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
  2874. dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
  2875. queue_pairs);
  2876. return -EINVAL;
  2877. }
  2878. succ:
  2879. vi->curr_queue_pairs = queue_pairs;
  2880. /* virtnet_open() will refill when device is going to up. */
  2881. if (dev->flags & IFF_UP)
  2882. schedule_delayed_work(&vi->refill, 0);
  2883. return 0;
  2884. }
  2885. static int virtnet_close(struct net_device *dev)
  2886. {
  2887. struct virtnet_info *vi = netdev_priv(dev);
  2888. int i;
  2889. /* Make sure NAPI doesn't schedule refill work */
  2890. disable_delayed_refill(vi);
  2891. /* Make sure refill_work doesn't re-enable napi! */
  2892. cancel_delayed_work_sync(&vi->refill);
  2893. /* Prevent the config change callback from changing carrier
  2894. * after close
  2895. */
  2896. virtio_config_driver_disable(vi->vdev);
  2897. /* Stop getting status/speed updates: we don't care until next
  2898. * open
  2899. */
  2900. cancel_work_sync(&vi->config_work);
  2901. for (i = 0; i < vi->max_queue_pairs; i++) {
  2902. virtnet_disable_queue_pair(vi, i);
  2903. virtnet_cancel_dim(vi, &vi->rq[i].dim);
  2904. }
  2905. netif_carrier_off(dev);
  2906. return 0;
  2907. }
  2908. static void virtnet_rx_mode_work(struct work_struct *work)
  2909. {
  2910. struct virtnet_info *vi =
  2911. container_of(work, struct virtnet_info, rx_mode_work);
  2912. u8 *promisc_allmulti __free(kfree) = NULL;
  2913. struct net_device *dev = vi->dev;
  2914. struct scatterlist sg[2];
  2915. struct virtio_net_ctrl_mac *mac_data;
  2916. struct netdev_hw_addr *ha;
  2917. int uc_count;
  2918. int mc_count;
  2919. void *buf;
  2920. int i;
  2921. /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
  2922. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
  2923. return;
  2924. promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL);
  2925. if (!promisc_allmulti) {
  2926. dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n");
  2927. return;
  2928. }
  2929. rtnl_lock();
  2930. *promisc_allmulti = !!(dev->flags & IFF_PROMISC);
  2931. sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
  2932. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
  2933. VIRTIO_NET_CTRL_RX_PROMISC, sg))
  2934. dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
  2935. *promisc_allmulti ? "en" : "dis");
  2936. *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI);
  2937. sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
  2938. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
  2939. VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
  2940. dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
  2941. *promisc_allmulti ? "en" : "dis");
  2942. netif_addr_lock_bh(dev);
  2943. uc_count = netdev_uc_count(dev);
  2944. mc_count = netdev_mc_count(dev);
  2945. /* MAC filter - use one buffer for both lists */
  2946. buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
  2947. (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
  2948. mac_data = buf;
  2949. if (!buf) {
  2950. netif_addr_unlock_bh(dev);
  2951. rtnl_unlock();
  2952. return;
  2953. }
  2954. sg_init_table(sg, 2);
  2955. /* Store the unicast list and count in the front of the buffer */
  2956. mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
  2957. i = 0;
  2958. netdev_for_each_uc_addr(ha, dev)
  2959. memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
  2960. sg_set_buf(&sg[0], mac_data,
  2961. sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
  2962. /* multicast list and count fill the end */
  2963. mac_data = (void *)&mac_data->macs[uc_count][0];
  2964. mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
  2965. i = 0;
  2966. netdev_for_each_mc_addr(ha, dev)
  2967. memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
  2968. netif_addr_unlock_bh(dev);
  2969. sg_set_buf(&sg[1], mac_data,
  2970. sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
  2971. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  2972. VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
  2973. dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
  2974. rtnl_unlock();
  2975. kfree(buf);
  2976. }
  2977. static void virtnet_set_rx_mode(struct net_device *dev)
  2978. {
  2979. struct virtnet_info *vi = netdev_priv(dev);
  2980. if (vi->rx_mode_work_enabled)
  2981. schedule_work(&vi->rx_mode_work);
  2982. }
  2983. static int virtnet_vlan_rx_add_vid(struct net_device *dev,
  2984. __be16 proto, u16 vid)
  2985. {
  2986. struct virtnet_info *vi = netdev_priv(dev);
  2987. __virtio16 *_vid __free(kfree) = NULL;
  2988. struct scatterlist sg;
  2989. _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
  2990. if (!_vid)
  2991. return -ENOMEM;
  2992. *_vid = cpu_to_virtio16(vi->vdev, vid);
  2993. sg_init_one(&sg, _vid, sizeof(*_vid));
  2994. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
  2995. VIRTIO_NET_CTRL_VLAN_ADD, &sg))
  2996. dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
  2997. return 0;
  2998. }
  2999. static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
  3000. __be16 proto, u16 vid)
  3001. {
  3002. struct virtnet_info *vi = netdev_priv(dev);
  3003. __virtio16 *_vid __free(kfree) = NULL;
  3004. struct scatterlist sg;
  3005. _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
  3006. if (!_vid)
  3007. return -ENOMEM;
  3008. *_vid = cpu_to_virtio16(vi->vdev, vid);
  3009. sg_init_one(&sg, _vid, sizeof(*_vid));
  3010. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
  3011. VIRTIO_NET_CTRL_VLAN_DEL, &sg))
  3012. dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
  3013. return 0;
  3014. }
  3015. static void virtnet_clean_affinity(struct virtnet_info *vi)
  3016. {
  3017. int i;
  3018. if (vi->affinity_hint_set) {
  3019. for (i = 0; i < vi->max_queue_pairs; i++) {
  3020. virtqueue_set_affinity(vi->rq[i].vq, NULL);
  3021. virtqueue_set_affinity(vi->sq[i].vq, NULL);
  3022. }
  3023. vi->affinity_hint_set = false;
  3024. }
  3025. }
  3026. static void virtnet_set_affinity(struct virtnet_info *vi)
  3027. {
  3028. cpumask_var_t mask;
  3029. int stragglers;
  3030. int group_size;
  3031. int i, j, cpu;
  3032. int num_cpu;
  3033. int stride;
  3034. if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
  3035. virtnet_clean_affinity(vi);
  3036. return;
  3037. }
  3038. num_cpu = num_online_cpus();
  3039. stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
  3040. stragglers = num_cpu >= vi->curr_queue_pairs ?
  3041. num_cpu % vi->curr_queue_pairs :
  3042. 0;
  3043. cpu = cpumask_first(cpu_online_mask);
  3044. for (i = 0; i < vi->curr_queue_pairs; i++) {
  3045. group_size = stride + (i < stragglers ? 1 : 0);
  3046. for (j = 0; j < group_size; j++) {
  3047. cpumask_set_cpu(cpu, mask);
  3048. cpu = cpumask_next_wrap(cpu, cpu_online_mask,
  3049. nr_cpu_ids, false);
  3050. }
  3051. virtqueue_set_affinity(vi->rq[i].vq, mask);
  3052. virtqueue_set_affinity(vi->sq[i].vq, mask);
  3053. __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
  3054. cpumask_clear(mask);
  3055. }
  3056. vi->affinity_hint_set = true;
  3057. free_cpumask_var(mask);
  3058. }
  3059. static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
  3060. {
  3061. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  3062. node);
  3063. virtnet_set_affinity(vi);
  3064. return 0;
  3065. }
  3066. static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
  3067. {
  3068. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  3069. node_dead);
  3070. virtnet_set_affinity(vi);
  3071. return 0;
  3072. }
  3073. static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
  3074. {
  3075. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  3076. node);
  3077. virtnet_clean_affinity(vi);
  3078. return 0;
  3079. }
  3080. static enum cpuhp_state virtionet_online;
  3081. static int virtnet_cpu_notif_add(struct virtnet_info *vi)
  3082. {
  3083. int ret;
  3084. ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
  3085. if (ret)
  3086. return ret;
  3087. ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
  3088. &vi->node_dead);
  3089. if (!ret)
  3090. return ret;
  3091. cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
  3092. return ret;
  3093. }
  3094. static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
  3095. {
  3096. cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
  3097. cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
  3098. &vi->node_dead);
  3099. }
  3100. static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
  3101. u16 vqn, u32 max_usecs, u32 max_packets)
  3102. {
  3103. struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL;
  3104. struct scatterlist sgs;
  3105. coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL);
  3106. if (!coal_vq)
  3107. return -ENOMEM;
  3108. coal_vq->vqn = cpu_to_le16(vqn);
  3109. coal_vq->coal.max_usecs = cpu_to_le32(max_usecs);
  3110. coal_vq->coal.max_packets = cpu_to_le32(max_packets);
  3111. sg_init_one(&sgs, coal_vq, sizeof(*coal_vq));
  3112. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
  3113. VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
  3114. &sgs))
  3115. return -EINVAL;
  3116. return 0;
  3117. }
  3118. static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
  3119. u16 queue, u32 max_usecs,
  3120. u32 max_packets)
  3121. {
  3122. int err;
  3123. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  3124. return -EOPNOTSUPP;
  3125. err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue),
  3126. max_usecs, max_packets);
  3127. if (err)
  3128. return err;
  3129. vi->rq[queue].intr_coal.max_usecs = max_usecs;
  3130. vi->rq[queue].intr_coal.max_packets = max_packets;
  3131. return 0;
  3132. }
  3133. static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
  3134. u16 queue, u32 max_usecs,
  3135. u32 max_packets)
  3136. {
  3137. int err;
  3138. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  3139. return -EOPNOTSUPP;
  3140. err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue),
  3141. max_usecs, max_packets);
  3142. if (err)
  3143. return err;
  3144. vi->sq[queue].intr_coal.max_usecs = max_usecs;
  3145. vi->sq[queue].intr_coal.max_packets = max_packets;
  3146. return 0;
  3147. }
  3148. static void virtnet_get_ringparam(struct net_device *dev,
  3149. struct ethtool_ringparam *ring,
  3150. struct kernel_ethtool_ringparam *kernel_ring,
  3151. struct netlink_ext_ack *extack)
  3152. {
  3153. struct virtnet_info *vi = netdev_priv(dev);
  3154. ring->rx_max_pending = vi->rq[0].vq->num_max;
  3155. ring->tx_max_pending = vi->sq[0].vq->num_max;
  3156. ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
  3157. ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
  3158. }
  3159. static int virtnet_set_ringparam(struct net_device *dev,
  3160. struct ethtool_ringparam *ring,
  3161. struct kernel_ethtool_ringparam *kernel_ring,
  3162. struct netlink_ext_ack *extack)
  3163. {
  3164. struct virtnet_info *vi = netdev_priv(dev);
  3165. u32 rx_pending, tx_pending;
  3166. struct receive_queue *rq;
  3167. struct send_queue *sq;
  3168. int i, err;
  3169. if (ring->rx_mini_pending || ring->rx_jumbo_pending)
  3170. return -EINVAL;
  3171. rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
  3172. tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
  3173. if (ring->rx_pending == rx_pending &&
  3174. ring->tx_pending == tx_pending)
  3175. return 0;
  3176. if (ring->rx_pending > vi->rq[0].vq->num_max)
  3177. return -EINVAL;
  3178. if (ring->tx_pending > vi->sq[0].vq->num_max)
  3179. return -EINVAL;
  3180. for (i = 0; i < vi->max_queue_pairs; i++) {
  3181. rq = vi->rq + i;
  3182. sq = vi->sq + i;
  3183. if (ring->tx_pending != tx_pending) {
  3184. err = virtnet_tx_resize(vi, sq, ring->tx_pending);
  3185. if (err)
  3186. return err;
  3187. /* Upon disabling and re-enabling a transmit virtqueue, the device must
  3188. * set the coalescing parameters of the virtqueue to those configured
  3189. * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
  3190. * did not set any TX coalescing parameters, to 0.
  3191. */
  3192. err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i,
  3193. vi->intr_coal_tx.max_usecs,
  3194. vi->intr_coal_tx.max_packets);
  3195. /* Don't break the tx resize action if the vq coalescing is not
  3196. * supported. The same is true for rx resize below.
  3197. */
  3198. if (err && err != -EOPNOTSUPP)
  3199. return err;
  3200. }
  3201. if (ring->rx_pending != rx_pending) {
  3202. err = virtnet_rx_resize(vi, rq, ring->rx_pending);
  3203. if (err)
  3204. return err;
  3205. /* The reason is same as the transmit virtqueue reset */
  3206. mutex_lock(&vi->rq[i].dim_lock);
  3207. err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i,
  3208. vi->intr_coal_rx.max_usecs,
  3209. vi->intr_coal_rx.max_packets);
  3210. mutex_unlock(&vi->rq[i].dim_lock);
  3211. if (err && err != -EOPNOTSUPP)
  3212. return err;
  3213. }
  3214. }
  3215. return 0;
  3216. }
  3217. static bool virtnet_commit_rss_command(struct virtnet_info *vi)
  3218. {
  3219. struct net_device *dev = vi->dev;
  3220. struct scatterlist sgs[4];
  3221. unsigned int sg_buf_size;
  3222. /* prepare sgs */
  3223. sg_init_table(sgs, 4);
  3224. sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved);
  3225. sg_set_buf(&sgs[0], &vi->rss, sg_buf_size);
  3226. if (vi->has_rss) {
  3227. sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
  3228. sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size);
  3229. } else {
  3230. sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t));
  3231. }
  3232. sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
  3233. - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
  3234. sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size);
  3235. sg_buf_size = vi->rss_key_size;
  3236. sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size);
  3237. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
  3238. vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
  3239. : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs))
  3240. goto err;
  3241. return true;
  3242. err:
  3243. dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
  3244. return false;
  3245. }
  3246. static void virtnet_init_default_rss(struct virtnet_info *vi)
  3247. {
  3248. vi->rss.hash_types = vi->rss_hash_types_supported;
  3249. vi->rss_hash_types_saved = vi->rss_hash_types_supported;
  3250. vi->rss.indirection_table_mask = vi->rss_indir_table_size
  3251. ? vi->rss_indir_table_size - 1 : 0;
  3252. vi->rss.unclassified_queue = 0;
  3253. virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs);
  3254. vi->rss.hash_key_length = vi->rss_key_size;
  3255. netdev_rss_key_fill(vi->rss.key, vi->rss_key_size);
  3256. }
  3257. static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
  3258. {
  3259. info->data = 0;
  3260. switch (info->flow_type) {
  3261. case TCP_V4_FLOW:
  3262. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
  3263. info->data = RXH_IP_SRC | RXH_IP_DST |
  3264. RXH_L4_B_0_1 | RXH_L4_B_2_3;
  3265. } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
  3266. info->data = RXH_IP_SRC | RXH_IP_DST;
  3267. }
  3268. break;
  3269. case TCP_V6_FLOW:
  3270. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
  3271. info->data = RXH_IP_SRC | RXH_IP_DST |
  3272. RXH_L4_B_0_1 | RXH_L4_B_2_3;
  3273. } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
  3274. info->data = RXH_IP_SRC | RXH_IP_DST;
  3275. }
  3276. break;
  3277. case UDP_V4_FLOW:
  3278. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
  3279. info->data = RXH_IP_SRC | RXH_IP_DST |
  3280. RXH_L4_B_0_1 | RXH_L4_B_2_3;
  3281. } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
  3282. info->data = RXH_IP_SRC | RXH_IP_DST;
  3283. }
  3284. break;
  3285. case UDP_V6_FLOW:
  3286. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
  3287. info->data = RXH_IP_SRC | RXH_IP_DST |
  3288. RXH_L4_B_0_1 | RXH_L4_B_2_3;
  3289. } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
  3290. info->data = RXH_IP_SRC | RXH_IP_DST;
  3291. }
  3292. break;
  3293. case IPV4_FLOW:
  3294. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
  3295. info->data = RXH_IP_SRC | RXH_IP_DST;
  3296. break;
  3297. case IPV6_FLOW:
  3298. if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
  3299. info->data = RXH_IP_SRC | RXH_IP_DST;
  3300. break;
  3301. default:
  3302. info->data = 0;
  3303. break;
  3304. }
  3305. }
  3306. static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
  3307. {
  3308. u32 new_hashtypes = vi->rss_hash_types_saved;
  3309. bool is_disable = info->data & RXH_DISCARD;
  3310. bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);
  3311. /* supports only 'sd', 'sdfn' and 'r' */
  3312. if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
  3313. return false;
  3314. switch (info->flow_type) {
  3315. case TCP_V4_FLOW:
  3316. new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
  3317. if (!is_disable)
  3318. new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
  3319. | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
  3320. break;
  3321. case UDP_V4_FLOW:
  3322. new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
  3323. if (!is_disable)
  3324. new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
  3325. | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
  3326. break;
  3327. case IPV4_FLOW:
  3328. new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
  3329. if (!is_disable)
  3330. new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
  3331. break;
  3332. case TCP_V6_FLOW:
  3333. new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
  3334. if (!is_disable)
  3335. new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
  3336. | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
  3337. break;
  3338. case UDP_V6_FLOW:
  3339. new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
  3340. if (!is_disable)
  3341. new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
  3342. | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
  3343. break;
  3344. case IPV6_FLOW:
  3345. new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
  3346. if (!is_disable)
  3347. new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
  3348. break;
  3349. default:
  3350. /* unsupported flow */
  3351. return false;
  3352. }
  3353. /* if unsupported hashtype was set */
  3354. if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
  3355. return false;
  3356. if (new_hashtypes != vi->rss_hash_types_saved) {
  3357. vi->rss_hash_types_saved = new_hashtypes;
  3358. vi->rss.hash_types = vi->rss_hash_types_saved;
  3359. if (vi->dev->features & NETIF_F_RXHASH)
  3360. return virtnet_commit_rss_command(vi);
  3361. }
  3362. return true;
  3363. }
  3364. static void virtnet_get_drvinfo(struct net_device *dev,
  3365. struct ethtool_drvinfo *info)
  3366. {
  3367. struct virtnet_info *vi = netdev_priv(dev);
  3368. struct virtio_device *vdev = vi->vdev;
  3369. strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
  3370. strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
  3371. strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
  3372. }
  3373. /* TODO: Eliminate OOO packets during switching */
  3374. static int virtnet_set_channels(struct net_device *dev,
  3375. struct ethtool_channels *channels)
  3376. {
  3377. struct virtnet_info *vi = netdev_priv(dev);
  3378. u16 queue_pairs = channels->combined_count;
  3379. int err;
  3380. /* We don't support separate rx/tx channels.
  3381. * We don't allow setting 'other' channels.
  3382. */
  3383. if (channels->rx_count || channels->tx_count || channels->other_count)
  3384. return -EINVAL;
  3385. if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
  3386. return -EINVAL;
  3387. /* For now we don't support modifying channels while XDP is loaded
  3388. * also when XDP is loaded all RX queues have XDP programs so we only
  3389. * need to check a single RX queue.
  3390. */
  3391. if (vi->rq[0].xdp_prog)
  3392. return -EINVAL;
  3393. cpus_read_lock();
  3394. err = virtnet_set_queues(vi, queue_pairs);
  3395. if (err) {
  3396. cpus_read_unlock();
  3397. goto err;
  3398. }
  3399. virtnet_set_affinity(vi);
  3400. cpus_read_unlock();
  3401. netif_set_real_num_tx_queues(dev, queue_pairs);
  3402. netif_set_real_num_rx_queues(dev, queue_pairs);
  3403. err:
  3404. return err;
  3405. }
  3406. static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt,
  3407. int num, int qid, const struct virtnet_stat_desc *desc)
  3408. {
  3409. int i;
  3410. if (qid < 0) {
  3411. for (i = 0; i < num; ++i)
  3412. ethtool_sprintf(p, noq_fmt, desc[i].desc);
  3413. } else {
  3414. for (i = 0; i < num; ++i)
  3415. ethtool_sprintf(p, fmt, qid, desc[i].desc);
  3416. }
  3417. }
  3418. /* qid == -1: for rx/tx queue total field */
  3419. static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
  3420. {
  3421. const struct virtnet_stat_desc *desc;
  3422. const char *fmt, *noq_fmt;
  3423. u8 *p = *data;
  3424. u32 num;
  3425. if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) {
  3426. noq_fmt = "cq_hw_%s";
  3427. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
  3428. desc = &virtnet_stats_cvq_desc[0];
  3429. num = ARRAY_SIZE(virtnet_stats_cvq_desc);
  3430. virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc);
  3431. }
  3432. }
  3433. if (type == VIRTNET_Q_TYPE_RX) {
  3434. fmt = "rx%u_%s";
  3435. noq_fmt = "rx_%s";
  3436. desc = &virtnet_rq_stats_desc[0];
  3437. num = ARRAY_SIZE(virtnet_rq_stats_desc);
  3438. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3439. fmt = "rx%u_hw_%s";
  3440. noq_fmt = "rx_hw_%s";
  3441. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  3442. desc = &virtnet_stats_rx_basic_desc[0];
  3443. num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
  3444. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3445. }
  3446. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  3447. desc = &virtnet_stats_rx_csum_desc[0];
  3448. num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
  3449. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3450. }
  3451. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
  3452. desc = &virtnet_stats_rx_speed_desc[0];
  3453. num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
  3454. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3455. }
  3456. }
  3457. if (type == VIRTNET_Q_TYPE_TX) {
  3458. fmt = "tx%u_%s";
  3459. noq_fmt = "tx_%s";
  3460. desc = &virtnet_sq_stats_desc[0];
  3461. num = ARRAY_SIZE(virtnet_sq_stats_desc);
  3462. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3463. fmt = "tx%u_hw_%s";
  3464. noq_fmt = "tx_hw_%s";
  3465. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  3466. desc = &virtnet_stats_tx_basic_desc[0];
  3467. num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
  3468. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3469. }
  3470. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  3471. desc = &virtnet_stats_tx_gso_desc[0];
  3472. num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
  3473. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3474. }
  3475. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
  3476. desc = &virtnet_stats_tx_speed_desc[0];
  3477. num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
  3478. virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
  3479. }
  3480. }
  3481. *data = p;
  3482. }
  3483. struct virtnet_stats_ctx {
  3484. /* The stats are write to qstats or ethtool -S */
  3485. bool to_qstat;
  3486. /* Used to calculate the offset inside the output buffer. */
  3487. u32 desc_num[3];
  3488. /* The actual supported stat types. */
  3489. u64 bitmap[3];
  3490. /* Used to calculate the reply buffer size. */
  3491. u32 size[3];
  3492. /* Record the output buffer. */
  3493. u64 *data;
  3494. };
  3495. static void virtnet_stats_ctx_init(struct virtnet_info *vi,
  3496. struct virtnet_stats_ctx *ctx,
  3497. u64 *data, bool to_qstat)
  3498. {
  3499. u32 queue_type;
  3500. ctx->data = data;
  3501. ctx->to_qstat = to_qstat;
  3502. if (to_qstat) {
  3503. ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
  3504. ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
  3505. queue_type = VIRTNET_Q_TYPE_RX;
  3506. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  3507. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
  3508. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
  3509. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
  3510. }
  3511. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  3512. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
  3513. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
  3514. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
  3515. }
  3516. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
  3517. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO;
  3518. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
  3519. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso);
  3520. }
  3521. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
  3522. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
  3523. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
  3524. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
  3525. }
  3526. queue_type = VIRTNET_Q_TYPE_TX;
  3527. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  3528. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
  3529. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
  3530. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
  3531. }
  3532. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
  3533. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM;
  3534. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
  3535. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum);
  3536. }
  3537. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  3538. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
  3539. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
  3540. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
  3541. }
  3542. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
  3543. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
  3544. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
  3545. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
  3546. }
  3547. return;
  3548. }
  3549. ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc);
  3550. ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc);
  3551. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
  3552. queue_type = VIRTNET_Q_TYPE_CQ;
  3553. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ;
  3554. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc);
  3555. ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq);
  3556. }
  3557. queue_type = VIRTNET_Q_TYPE_RX;
  3558. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  3559. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
  3560. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc);
  3561. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
  3562. }
  3563. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  3564. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
  3565. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc);
  3566. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
  3567. }
  3568. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
  3569. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
  3570. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc);
  3571. ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
  3572. }
  3573. queue_type = VIRTNET_Q_TYPE_TX;
  3574. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  3575. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
  3576. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc);
  3577. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
  3578. }
  3579. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  3580. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
  3581. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc);
  3582. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
  3583. }
  3584. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
  3585. ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
  3586. ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc);
  3587. ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
  3588. }
  3589. }
  3590. /* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
  3591. * @sum: the position to store the sum values
  3592. * @num: field num
  3593. * @q_value: the first queue fields
  3594. * @q_num: number of the queues
  3595. */
  3596. static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num)
  3597. {
  3598. u32 step = num;
  3599. int i, j;
  3600. u64 *p;
  3601. for (i = 0; i < num; ++i) {
  3602. p = sum + i;
  3603. *p = 0;
  3604. for (j = 0; j < q_num; ++j)
  3605. *p += *(q_value + i + j * step);
  3606. }
  3607. }
  3608. static void virtnet_fill_total_fields(struct virtnet_info *vi,
  3609. struct virtnet_stats_ctx *ctx)
  3610. {
  3611. u64 *data, *first_rx_q, *first_tx_q;
  3612. u32 num_cq, num_rx, num_tx;
  3613. num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
  3614. num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
  3615. num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
  3616. first_rx_q = ctx->data + num_rx + num_tx + num_cq;
  3617. first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx;
  3618. data = ctx->data;
  3619. stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs);
  3620. data = ctx->data + num_rx;
  3621. stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs);
  3622. }
  3623. static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid,
  3624. struct virtnet_stats_ctx *ctx,
  3625. const u8 *base, bool drv_stats, u8 reply_type)
  3626. {
  3627. const struct virtnet_stat_desc *desc;
  3628. const u64_stats_t *v_stat;
  3629. u64 offset, bitmap;
  3630. const __le64 *v;
  3631. u32 queue_type;
  3632. int i, num;
  3633. queue_type = vq_type(vi, qid);
  3634. bitmap = ctx->bitmap[queue_type];
  3635. if (drv_stats) {
  3636. if (queue_type == VIRTNET_Q_TYPE_RX) {
  3637. desc = &virtnet_rq_stats_desc_qstat[0];
  3638. num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
  3639. } else {
  3640. desc = &virtnet_sq_stats_desc_qstat[0];
  3641. num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
  3642. }
  3643. for (i = 0; i < num; ++i) {
  3644. offset = desc[i].qstat_offset / sizeof(*ctx->data);
  3645. v_stat = (const u64_stats_t *)(base + desc[i].offset);
  3646. ctx->data[offset] = u64_stats_read(v_stat);
  3647. }
  3648. return;
  3649. }
  3650. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  3651. desc = &virtnet_stats_rx_basic_desc_qstat[0];
  3652. num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
  3653. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
  3654. goto found;
  3655. }
  3656. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  3657. desc = &virtnet_stats_rx_csum_desc_qstat[0];
  3658. num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
  3659. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
  3660. goto found;
  3661. }
  3662. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
  3663. desc = &virtnet_stats_rx_gso_desc_qstat[0];
  3664. num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
  3665. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO)
  3666. goto found;
  3667. }
  3668. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
  3669. desc = &virtnet_stats_rx_speed_desc_qstat[0];
  3670. num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
  3671. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
  3672. goto found;
  3673. }
  3674. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  3675. desc = &virtnet_stats_tx_basic_desc_qstat[0];
  3676. num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
  3677. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
  3678. goto found;
  3679. }
  3680. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
  3681. desc = &virtnet_stats_tx_csum_desc_qstat[0];
  3682. num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
  3683. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM)
  3684. goto found;
  3685. }
  3686. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  3687. desc = &virtnet_stats_tx_gso_desc_qstat[0];
  3688. num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
  3689. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
  3690. goto found;
  3691. }
  3692. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
  3693. desc = &virtnet_stats_tx_speed_desc_qstat[0];
  3694. num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
  3695. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
  3696. goto found;
  3697. }
  3698. return;
  3699. found:
  3700. for (i = 0; i < num; ++i) {
  3701. offset = desc[i].qstat_offset / sizeof(*ctx->data);
  3702. v = (const __le64 *)(base + desc[i].offset);
  3703. ctx->data[offset] = le64_to_cpu(*v);
  3704. }
  3705. }
  3706. /* virtnet_fill_stats - copy the stats to qstats or ethtool -S
  3707. * The stats source is the device or the driver.
  3708. *
  3709. * @vi: virtio net info
  3710. * @qid: the vq id
  3711. * @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
  3712. * @base: pointer to the device reply or the driver stats structure.
  3713. * @drv_stats: designate the base type (device reply, driver stats)
  3714. * @type: the type of the device reply (if drv_stats is true, this must be zero)
  3715. */
  3716. static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid,
  3717. struct virtnet_stats_ctx *ctx,
  3718. const u8 *base, bool drv_stats, u8 reply_type)
  3719. {
  3720. u32 queue_type, num_rx, num_tx, num_cq;
  3721. const struct virtnet_stat_desc *desc;
  3722. const u64_stats_t *v_stat;
  3723. u64 offset, bitmap;
  3724. const __le64 *v;
  3725. int i, num;
  3726. if (ctx->to_qstat)
  3727. return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type);
  3728. num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
  3729. num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
  3730. num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
  3731. queue_type = vq_type(vi, qid);
  3732. bitmap = ctx->bitmap[queue_type];
  3733. /* skip the total fields of pairs */
  3734. offset = num_rx + num_tx;
  3735. if (queue_type == VIRTNET_Q_TYPE_TX) {
  3736. offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
  3737. num = ARRAY_SIZE(virtnet_sq_stats_desc);
  3738. if (drv_stats) {
  3739. desc = &virtnet_sq_stats_desc[0];
  3740. goto drv_stats;
  3741. }
  3742. offset += num;
  3743. } else if (queue_type == VIRTNET_Q_TYPE_RX) {
  3744. offset += num_cq + num_rx * (qid / 2);
  3745. num = ARRAY_SIZE(virtnet_rq_stats_desc);
  3746. if (drv_stats) {
  3747. desc = &virtnet_rq_stats_desc[0];
  3748. goto drv_stats;
  3749. }
  3750. offset += num;
  3751. }
  3752. if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) {
  3753. desc = &virtnet_stats_cvq_desc[0];
  3754. num = ARRAY_SIZE(virtnet_stats_cvq_desc);
  3755. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ)
  3756. goto found;
  3757. offset += num;
  3758. }
  3759. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  3760. desc = &virtnet_stats_rx_basic_desc[0];
  3761. num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
  3762. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
  3763. goto found;
  3764. offset += num;
  3765. }
  3766. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  3767. desc = &virtnet_stats_rx_csum_desc[0];
  3768. num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
  3769. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
  3770. goto found;
  3771. offset += num;
  3772. }
  3773. if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
  3774. desc = &virtnet_stats_rx_speed_desc[0];
  3775. num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
  3776. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
  3777. goto found;
  3778. offset += num;
  3779. }
  3780. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  3781. desc = &virtnet_stats_tx_basic_desc[0];
  3782. num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
  3783. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
  3784. goto found;
  3785. offset += num;
  3786. }
  3787. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  3788. desc = &virtnet_stats_tx_gso_desc[0];
  3789. num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
  3790. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
  3791. goto found;
  3792. offset += num;
  3793. }
  3794. if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
  3795. desc = &virtnet_stats_tx_speed_desc[0];
  3796. num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
  3797. if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
  3798. goto found;
  3799. offset += num;
  3800. }
  3801. return;
  3802. found:
  3803. for (i = 0; i < num; ++i) {
  3804. v = (const __le64 *)(base + desc[i].offset);
  3805. ctx->data[offset + i] = le64_to_cpu(*v);
  3806. }
  3807. return;
  3808. drv_stats:
  3809. for (i = 0; i < num; ++i) {
  3810. v_stat = (const u64_stats_t *)(base + desc[i].offset);
  3811. ctx->data[offset + i] = u64_stats_read(v_stat);
  3812. }
  3813. }
  3814. static int __virtnet_get_hw_stats(struct virtnet_info *vi,
  3815. struct virtnet_stats_ctx *ctx,
  3816. struct virtio_net_ctrl_queue_stats *req,
  3817. int req_size, void *reply, int res_size)
  3818. {
  3819. struct virtio_net_stats_reply_hdr *hdr;
  3820. struct scatterlist sgs_in, sgs_out;
  3821. void *p;
  3822. u32 qid;
  3823. int ok;
  3824. sg_init_one(&sgs_out, req, req_size);
  3825. sg_init_one(&sgs_in, reply, res_size);
  3826. ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
  3827. VIRTIO_NET_CTRL_STATS_GET,
  3828. &sgs_out, &sgs_in);
  3829. if (!ok)
  3830. return ok;
  3831. for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
  3832. hdr = p;
  3833. qid = le16_to_cpu(hdr->vq_index);
  3834. virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type);
  3835. }
  3836. return 0;
  3837. }
  3838. static void virtnet_make_stat_req(struct virtnet_info *vi,
  3839. struct virtnet_stats_ctx *ctx,
  3840. struct virtio_net_ctrl_queue_stats *req,
  3841. int qid, int *idx)
  3842. {
  3843. int qtype = vq_type(vi, qid);
  3844. u64 bitmap = ctx->bitmap[qtype];
  3845. if (!bitmap)
  3846. return;
  3847. req->stats[*idx].vq_index = cpu_to_le16(qid);
  3848. req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap);
  3849. *idx += 1;
  3850. }
  3851. /* qid: -1: get stats of all vq.
  3852. * > 0: get the stats for the special vq. This must not be cvq.
  3853. */
  3854. static int virtnet_get_hw_stats(struct virtnet_info *vi,
  3855. struct virtnet_stats_ctx *ctx, int qid)
  3856. {
  3857. int qnum, i, j, res_size, qtype, last_vq, first_vq;
  3858. struct virtio_net_ctrl_queue_stats *req;
  3859. bool enable_cvq;
  3860. void *reply;
  3861. int ok;
  3862. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
  3863. return 0;
  3864. if (qid == -1) {
  3865. last_vq = vi->curr_queue_pairs * 2 - 1;
  3866. first_vq = 0;
  3867. enable_cvq = true;
  3868. } else {
  3869. last_vq = qid;
  3870. first_vq = qid;
  3871. enable_cvq = false;
  3872. }
  3873. qnum = 0;
  3874. res_size = 0;
  3875. for (i = first_vq; i <= last_vq ; ++i) {
  3876. qtype = vq_type(vi, i);
  3877. if (ctx->bitmap[qtype]) {
  3878. ++qnum;
  3879. res_size += ctx->size[qtype];
  3880. }
  3881. }
  3882. if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) {
  3883. res_size += ctx->size[VIRTNET_Q_TYPE_CQ];
  3884. qnum += 1;
  3885. }
  3886. req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
  3887. if (!req)
  3888. return -ENOMEM;
  3889. reply = kmalloc(res_size, GFP_KERNEL);
  3890. if (!reply) {
  3891. kfree(req);
  3892. return -ENOMEM;
  3893. }
  3894. j = 0;
  3895. for (i = first_vq; i <= last_vq ; ++i)
  3896. virtnet_make_stat_req(vi, ctx, req, i, &j);
  3897. if (enable_cvq)
  3898. virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j);
  3899. ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size);
  3900. kfree(req);
  3901. kfree(reply);
  3902. return ok;
  3903. }
  3904. static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  3905. {
  3906. struct virtnet_info *vi = netdev_priv(dev);
  3907. unsigned int i;
  3908. u8 *p = data;
  3909. switch (stringset) {
  3910. case ETH_SS_STATS:
  3911. /* Generate the total field names. */
  3912. virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p);
  3913. virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p);
  3914. virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p);
  3915. for (i = 0; i < vi->curr_queue_pairs; ++i)
  3916. virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p);
  3917. for (i = 0; i < vi->curr_queue_pairs; ++i)
  3918. virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p);
  3919. break;
  3920. }
  3921. }
  3922. static int virtnet_get_sset_count(struct net_device *dev, int sset)
  3923. {
  3924. struct virtnet_info *vi = netdev_priv(dev);
  3925. struct virtnet_stats_ctx ctx = {0};
  3926. u32 pair_count;
  3927. switch (sset) {
  3928. case ETH_SS_STATS:
  3929. virtnet_stats_ctx_init(vi, &ctx, NULL, false);
  3930. pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX];
  3931. return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] +
  3932. vi->curr_queue_pairs * pair_count;
  3933. default:
  3934. return -EOPNOTSUPP;
  3935. }
  3936. }
  3937. static void virtnet_get_ethtool_stats(struct net_device *dev,
  3938. struct ethtool_stats *stats, u64 *data)
  3939. {
  3940. struct virtnet_info *vi = netdev_priv(dev);
  3941. struct virtnet_stats_ctx ctx = {0};
  3942. unsigned int start, i;
  3943. const u8 *stats_base;
  3944. virtnet_stats_ctx_init(vi, &ctx, data, false);
  3945. if (virtnet_get_hw_stats(vi, &ctx, -1))
  3946. dev_warn(&vi->dev->dev, "Failed to get hw stats.\n");
  3947. for (i = 0; i < vi->curr_queue_pairs; i++) {
  3948. struct receive_queue *rq = &vi->rq[i];
  3949. struct send_queue *sq = &vi->sq[i];
  3950. stats_base = (const u8 *)&rq->stats;
  3951. do {
  3952. start = u64_stats_fetch_begin(&rq->stats.syncp);
  3953. virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0);
  3954. } while (u64_stats_fetch_retry(&rq->stats.syncp, start));
  3955. stats_base = (const u8 *)&sq->stats;
  3956. do {
  3957. start = u64_stats_fetch_begin(&sq->stats.syncp);
  3958. virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0);
  3959. } while (u64_stats_fetch_retry(&sq->stats.syncp, start));
  3960. }
  3961. virtnet_fill_total_fields(vi, &ctx);
  3962. }
  3963. static void virtnet_get_channels(struct net_device *dev,
  3964. struct ethtool_channels *channels)
  3965. {
  3966. struct virtnet_info *vi = netdev_priv(dev);
  3967. channels->combined_count = vi->curr_queue_pairs;
  3968. channels->max_combined = vi->max_queue_pairs;
  3969. channels->max_other = 0;
  3970. channels->rx_count = 0;
  3971. channels->tx_count = 0;
  3972. channels->other_count = 0;
  3973. }
  3974. static int virtnet_set_link_ksettings(struct net_device *dev,
  3975. const struct ethtool_link_ksettings *cmd)
  3976. {
  3977. struct virtnet_info *vi = netdev_priv(dev);
  3978. return ethtool_virtdev_set_link_ksettings(dev, cmd,
  3979. &vi->speed, &vi->duplex);
  3980. }
  3981. static int virtnet_get_link_ksettings(struct net_device *dev,
  3982. struct ethtool_link_ksettings *cmd)
  3983. {
  3984. struct virtnet_info *vi = netdev_priv(dev);
  3985. cmd->base.speed = vi->speed;
  3986. cmd->base.duplex = vi->duplex;
  3987. cmd->base.port = PORT_OTHER;
  3988. return 0;
  3989. }
  3990. static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
  3991. struct ethtool_coalesce *ec)
  3992. {
  3993. struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL;
  3994. struct scatterlist sgs_tx;
  3995. int i;
  3996. coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL);
  3997. if (!coal_tx)
  3998. return -ENOMEM;
  3999. coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
  4000. coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
  4001. sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx));
  4002. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
  4003. VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
  4004. &sgs_tx))
  4005. return -EINVAL;
  4006. vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
  4007. vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
  4008. for (i = 0; i < vi->max_queue_pairs; i++) {
  4009. vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
  4010. vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
  4011. }
  4012. return 0;
  4013. }
  4014. static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
  4015. struct ethtool_coalesce *ec)
  4016. {
  4017. struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
  4018. bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
  4019. struct scatterlist sgs_rx;
  4020. int i;
  4021. if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  4022. return -EOPNOTSUPP;
  4023. if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs ||
  4024. ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
  4025. return -EINVAL;
  4026. if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
  4027. vi->rx_dim_enabled = true;
  4028. for (i = 0; i < vi->max_queue_pairs; i++) {
  4029. mutex_lock(&vi->rq[i].dim_lock);
  4030. vi->rq[i].dim_enabled = true;
  4031. mutex_unlock(&vi->rq[i].dim_lock);
  4032. }
  4033. return 0;
  4034. }
  4035. coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
  4036. if (!coal_rx)
  4037. return -ENOMEM;
  4038. if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
  4039. vi->rx_dim_enabled = false;
  4040. for (i = 0; i < vi->max_queue_pairs; i++) {
  4041. mutex_lock(&vi->rq[i].dim_lock);
  4042. vi->rq[i].dim_enabled = false;
  4043. mutex_unlock(&vi->rq[i].dim_lock);
  4044. }
  4045. }
  4046. /* Since the per-queue coalescing params can be set,
  4047. * we need apply the global new params even if they
  4048. * are not updated.
  4049. */
  4050. coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
  4051. coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
  4052. sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx));
  4053. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
  4054. VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
  4055. &sgs_rx))
  4056. return -EINVAL;
  4057. vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
  4058. vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
  4059. for (i = 0; i < vi->max_queue_pairs; i++) {
  4060. mutex_lock(&vi->rq[i].dim_lock);
  4061. vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
  4062. vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
  4063. mutex_unlock(&vi->rq[i].dim_lock);
  4064. }
  4065. return 0;
  4066. }
  4067. static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
  4068. struct ethtool_coalesce *ec)
  4069. {
  4070. int err;
  4071. err = virtnet_send_tx_notf_coal_cmds(vi, ec);
  4072. if (err)
  4073. return err;
  4074. err = virtnet_send_rx_notf_coal_cmds(vi, ec);
  4075. if (err)
  4076. return err;
  4077. return 0;
  4078. }
  4079. static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
  4080. struct ethtool_coalesce *ec,
  4081. u16 queue)
  4082. {
  4083. bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
  4084. u32 max_usecs, max_packets;
  4085. bool cur_rx_dim;
  4086. int err;
  4087. mutex_lock(&vi->rq[queue].dim_lock);
  4088. cur_rx_dim = vi->rq[queue].dim_enabled;
  4089. max_usecs = vi->rq[queue].intr_coal.max_usecs;
  4090. max_packets = vi->rq[queue].intr_coal.max_packets;
  4091. if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
  4092. ec->rx_max_coalesced_frames != max_packets)) {
  4093. mutex_unlock(&vi->rq[queue].dim_lock);
  4094. return -EINVAL;
  4095. }
  4096. if (rx_ctrl_dim_on && !cur_rx_dim) {
  4097. vi->rq[queue].dim_enabled = true;
  4098. mutex_unlock(&vi->rq[queue].dim_lock);
  4099. return 0;
  4100. }
  4101. if (!rx_ctrl_dim_on && cur_rx_dim)
  4102. vi->rq[queue].dim_enabled = false;
  4103. /* If no params are updated, userspace ethtool will
  4104. * reject the modification.
  4105. */
  4106. err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
  4107. ec->rx_coalesce_usecs,
  4108. ec->rx_max_coalesced_frames);
  4109. mutex_unlock(&vi->rq[queue].dim_lock);
  4110. return err;
  4111. }
  4112. static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
  4113. struct ethtool_coalesce *ec,
  4114. u16 queue)
  4115. {
  4116. int err;
  4117. err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
  4118. if (err)
  4119. return err;
  4120. err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
  4121. ec->tx_coalesce_usecs,
  4122. ec->tx_max_coalesced_frames);
  4123. if (err)
  4124. return err;
  4125. return 0;
  4126. }
  4127. static void virtnet_rx_dim_work(struct work_struct *work)
  4128. {
  4129. struct dim *dim = container_of(work, struct dim, work);
  4130. struct receive_queue *rq = container_of(dim,
  4131. struct receive_queue, dim);
  4132. struct virtnet_info *vi = rq->vq->vdev->priv;
  4133. struct net_device *dev = vi->dev;
  4134. struct dim_cq_moder update_moder;
  4135. int qnum, err;
  4136. qnum = rq - vi->rq;
  4137. mutex_lock(&rq->dim_lock);
  4138. if (!rq->dim_enabled)
  4139. goto out;
  4140. update_moder = net_dim_get_rx_irq_moder(dev, dim);
  4141. if (update_moder.usec != rq->intr_coal.max_usecs ||
  4142. update_moder.pkts != rq->intr_coal.max_packets) {
  4143. err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
  4144. update_moder.usec,
  4145. update_moder.pkts);
  4146. if (err)
  4147. pr_debug("%s: Failed to send dim parameters on rxq%d\n",
  4148. dev->name, qnum);
  4149. }
  4150. out:
  4151. dim->state = DIM_START_MEASURE;
  4152. mutex_unlock(&rq->dim_lock);
  4153. }
  4154. static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
  4155. {
  4156. /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
  4157. * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
  4158. */
  4159. if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
  4160. return -EOPNOTSUPP;
  4161. if (ec->tx_max_coalesced_frames > 1 ||
  4162. ec->rx_max_coalesced_frames != 1)
  4163. return -EINVAL;
  4164. return 0;
  4165. }
  4166. static int virtnet_should_update_vq_weight(int dev_flags, int weight,
  4167. int vq_weight, bool *should_update)
  4168. {
  4169. if (weight ^ vq_weight) {
  4170. if (dev_flags & IFF_UP)
  4171. return -EBUSY;
  4172. *should_update = true;
  4173. }
  4174. return 0;
  4175. }
  4176. static int virtnet_set_coalesce(struct net_device *dev,
  4177. struct ethtool_coalesce *ec,
  4178. struct kernel_ethtool_coalesce *kernel_coal,
  4179. struct netlink_ext_ack *extack)
  4180. {
  4181. struct virtnet_info *vi = netdev_priv(dev);
  4182. int ret, queue_number, napi_weight;
  4183. bool update_napi = false;
  4184. /* Can't change NAPI weight if the link is up */
  4185. napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
  4186. for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
  4187. ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
  4188. vi->sq[queue_number].napi.weight,
  4189. &update_napi);
  4190. if (ret)
  4191. return ret;
  4192. if (update_napi) {
  4193. /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
  4194. * updated for the sake of simplicity, which might not be necessary
  4195. */
  4196. break;
  4197. }
  4198. }
  4199. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
  4200. ret = virtnet_send_notf_coal_cmds(vi, ec);
  4201. else
  4202. ret = virtnet_coal_params_supported(ec);
  4203. if (ret)
  4204. return ret;
  4205. if (update_napi) {
  4206. for (; queue_number < vi->max_queue_pairs; queue_number++)
  4207. vi->sq[queue_number].napi.weight = napi_weight;
  4208. }
  4209. return ret;
  4210. }
  4211. static int virtnet_get_coalesce(struct net_device *dev,
  4212. struct ethtool_coalesce *ec,
  4213. struct kernel_ethtool_coalesce *kernel_coal,
  4214. struct netlink_ext_ack *extack)
  4215. {
  4216. struct virtnet_info *vi = netdev_priv(dev);
  4217. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
  4218. ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
  4219. ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
  4220. ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
  4221. ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
  4222. ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
  4223. } else {
  4224. ec->rx_max_coalesced_frames = 1;
  4225. if (vi->sq[0].napi.weight)
  4226. ec->tx_max_coalesced_frames = 1;
  4227. }
  4228. return 0;
  4229. }
  4230. static int virtnet_set_per_queue_coalesce(struct net_device *dev,
  4231. u32 queue,
  4232. struct ethtool_coalesce *ec)
  4233. {
  4234. struct virtnet_info *vi = netdev_priv(dev);
  4235. int ret, napi_weight;
  4236. bool update_napi = false;
  4237. if (queue >= vi->max_queue_pairs)
  4238. return -EINVAL;
  4239. /* Can't change NAPI weight if the link is up */
  4240. napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
  4241. ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
  4242. vi->sq[queue].napi.weight,
  4243. &update_napi);
  4244. if (ret)
  4245. return ret;
  4246. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  4247. ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
  4248. else
  4249. ret = virtnet_coal_params_supported(ec);
  4250. if (ret)
  4251. return ret;
  4252. if (update_napi)
  4253. vi->sq[queue].napi.weight = napi_weight;
  4254. return 0;
  4255. }
  4256. static int virtnet_get_per_queue_coalesce(struct net_device *dev,
  4257. u32 queue,
  4258. struct ethtool_coalesce *ec)
  4259. {
  4260. struct virtnet_info *vi = netdev_priv(dev);
  4261. if (queue >= vi->max_queue_pairs)
  4262. return -EINVAL;
  4263. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
  4264. mutex_lock(&vi->rq[queue].dim_lock);
  4265. ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
  4266. ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
  4267. ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
  4268. ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
  4269. ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
  4270. mutex_unlock(&vi->rq[queue].dim_lock);
  4271. } else {
  4272. ec->rx_max_coalesced_frames = 1;
  4273. if (vi->sq[queue].napi.weight)
  4274. ec->tx_max_coalesced_frames = 1;
  4275. }
  4276. return 0;
  4277. }
  4278. static void virtnet_init_settings(struct net_device *dev)
  4279. {
  4280. struct virtnet_info *vi = netdev_priv(dev);
  4281. vi->speed = SPEED_UNKNOWN;
  4282. vi->duplex = DUPLEX_UNKNOWN;
  4283. }
  4284. static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
  4285. {
  4286. return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
  4287. }
  4288. static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
  4289. {
  4290. return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
  4291. }
  4292. static int virtnet_get_rxfh(struct net_device *dev,
  4293. struct ethtool_rxfh_param *rxfh)
  4294. {
  4295. struct virtnet_info *vi = netdev_priv(dev);
  4296. int i;
  4297. if (rxfh->indir) {
  4298. for (i = 0; i < vi->rss_indir_table_size; ++i)
  4299. rxfh->indir[i] = vi->rss.indirection_table[i];
  4300. }
  4301. if (rxfh->key)
  4302. memcpy(rxfh->key, vi->rss.key, vi->rss_key_size);
  4303. rxfh->hfunc = ETH_RSS_HASH_TOP;
  4304. return 0;
  4305. }
  4306. static int virtnet_set_rxfh(struct net_device *dev,
  4307. struct ethtool_rxfh_param *rxfh,
  4308. struct netlink_ext_ack *extack)
  4309. {
  4310. struct virtnet_info *vi = netdev_priv(dev);
  4311. bool update = false;
  4312. int i;
  4313. if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
  4314. rxfh->hfunc != ETH_RSS_HASH_TOP)
  4315. return -EOPNOTSUPP;
  4316. if (rxfh->indir) {
  4317. if (!vi->has_rss)
  4318. return -EOPNOTSUPP;
  4319. for (i = 0; i < vi->rss_indir_table_size; ++i)
  4320. vi->rss.indirection_table[i] = rxfh->indir[i];
  4321. update = true;
  4322. }
  4323. if (rxfh->key) {
  4324. /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
  4325. * device provides hash calculation capabilities, that is,
  4326. * hash_key is configured.
  4327. */
  4328. if (!vi->has_rss && !vi->has_rss_hash_report)
  4329. return -EOPNOTSUPP;
  4330. memcpy(vi->rss.key, rxfh->key, vi->rss_key_size);
  4331. update = true;
  4332. }
  4333. if (update)
  4334. virtnet_commit_rss_command(vi);
  4335. return 0;
  4336. }
  4337. static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
  4338. {
  4339. struct virtnet_info *vi = netdev_priv(dev);
  4340. int rc = 0;
  4341. switch (info->cmd) {
  4342. case ETHTOOL_GRXRINGS:
  4343. info->data = vi->curr_queue_pairs;
  4344. break;
  4345. case ETHTOOL_GRXFH:
  4346. virtnet_get_hashflow(vi, info);
  4347. break;
  4348. default:
  4349. rc = -EOPNOTSUPP;
  4350. }
  4351. return rc;
  4352. }
  4353. static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
  4354. {
  4355. struct virtnet_info *vi = netdev_priv(dev);
  4356. int rc = 0;
  4357. switch (info->cmd) {
  4358. case ETHTOOL_SRXFH:
  4359. if (!virtnet_set_hashflow(vi, info))
  4360. rc = -EINVAL;
  4361. break;
  4362. default:
  4363. rc = -EOPNOTSUPP;
  4364. }
  4365. return rc;
  4366. }
  4367. static const struct ethtool_ops virtnet_ethtool_ops = {
  4368. .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
  4369. ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
  4370. .get_drvinfo = virtnet_get_drvinfo,
  4371. .get_link = ethtool_op_get_link,
  4372. .get_ringparam = virtnet_get_ringparam,
  4373. .set_ringparam = virtnet_set_ringparam,
  4374. .get_strings = virtnet_get_strings,
  4375. .get_sset_count = virtnet_get_sset_count,
  4376. .get_ethtool_stats = virtnet_get_ethtool_stats,
  4377. .set_channels = virtnet_set_channels,
  4378. .get_channels = virtnet_get_channels,
  4379. .get_ts_info = ethtool_op_get_ts_info,
  4380. .get_link_ksettings = virtnet_get_link_ksettings,
  4381. .set_link_ksettings = virtnet_set_link_ksettings,
  4382. .set_coalesce = virtnet_set_coalesce,
  4383. .get_coalesce = virtnet_get_coalesce,
  4384. .set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
  4385. .get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
  4386. .get_rxfh_key_size = virtnet_get_rxfh_key_size,
  4387. .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
  4388. .get_rxfh = virtnet_get_rxfh,
  4389. .set_rxfh = virtnet_set_rxfh,
  4390. .get_rxnfc = virtnet_get_rxnfc,
  4391. .set_rxnfc = virtnet_set_rxnfc,
  4392. };
  4393. static void virtnet_get_queue_stats_rx(struct net_device *dev, int i,
  4394. struct netdev_queue_stats_rx *stats)
  4395. {
  4396. struct virtnet_info *vi = netdev_priv(dev);
  4397. struct receive_queue *rq = &vi->rq[i];
  4398. struct virtnet_stats_ctx ctx = {0};
  4399. virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
  4400. virtnet_get_hw_stats(vi, &ctx, i * 2);
  4401. virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0);
  4402. }
  4403. static void virtnet_get_queue_stats_tx(struct net_device *dev, int i,
  4404. struct netdev_queue_stats_tx *stats)
  4405. {
  4406. struct virtnet_info *vi = netdev_priv(dev);
  4407. struct send_queue *sq = &vi->sq[i];
  4408. struct virtnet_stats_ctx ctx = {0};
  4409. virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
  4410. virtnet_get_hw_stats(vi, &ctx, i * 2 + 1);
  4411. virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0);
  4412. }
  4413. static void virtnet_get_base_stats(struct net_device *dev,
  4414. struct netdev_queue_stats_rx *rx,
  4415. struct netdev_queue_stats_tx *tx)
  4416. {
  4417. struct virtnet_info *vi = netdev_priv(dev);
  4418. /* The queue stats of the virtio-net will not be reset. So here we
  4419. * return 0.
  4420. */
  4421. rx->bytes = 0;
  4422. rx->packets = 0;
  4423. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
  4424. rx->hw_drops = 0;
  4425. rx->hw_drop_overruns = 0;
  4426. }
  4427. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
  4428. rx->csum_unnecessary = 0;
  4429. rx->csum_none = 0;
  4430. rx->csum_bad = 0;
  4431. }
  4432. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
  4433. rx->hw_gro_packets = 0;
  4434. rx->hw_gro_bytes = 0;
  4435. rx->hw_gro_wire_packets = 0;
  4436. rx->hw_gro_wire_bytes = 0;
  4437. }
  4438. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED)
  4439. rx->hw_drop_ratelimits = 0;
  4440. tx->bytes = 0;
  4441. tx->packets = 0;
  4442. tx->stop = 0;
  4443. tx->wake = 0;
  4444. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
  4445. tx->hw_drops = 0;
  4446. tx->hw_drop_errors = 0;
  4447. }
  4448. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
  4449. tx->csum_none = 0;
  4450. tx->needs_csum = 0;
  4451. }
  4452. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
  4453. tx->hw_gso_packets = 0;
  4454. tx->hw_gso_bytes = 0;
  4455. tx->hw_gso_wire_packets = 0;
  4456. tx->hw_gso_wire_bytes = 0;
  4457. }
  4458. if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED)
  4459. tx->hw_drop_ratelimits = 0;
  4460. netdev_stat_queue_sum(dev,
  4461. dev->real_num_rx_queues, vi->max_queue_pairs, rx,
  4462. dev->real_num_tx_queues, vi->max_queue_pairs, tx);
  4463. }
  4464. static const struct netdev_stat_ops virtnet_stat_ops = {
  4465. .get_queue_stats_rx = virtnet_get_queue_stats_rx,
  4466. .get_queue_stats_tx = virtnet_get_queue_stats_tx,
  4467. .get_base_stats = virtnet_get_base_stats,
  4468. };
  4469. static void virtnet_freeze_down(struct virtio_device *vdev)
  4470. {
  4471. struct virtnet_info *vi = vdev->priv;
  4472. /* Make sure no work handler is accessing the device */
  4473. flush_work(&vi->config_work);
  4474. disable_rx_mode_work(vi);
  4475. flush_work(&vi->rx_mode_work);
  4476. netif_tx_lock_bh(vi->dev);
  4477. netif_device_detach(vi->dev);
  4478. netif_tx_unlock_bh(vi->dev);
  4479. if (netif_running(vi->dev))
  4480. virtnet_close(vi->dev);
  4481. }
  4482. static int init_vqs(struct virtnet_info *vi);
  4483. static int virtnet_restore_up(struct virtio_device *vdev)
  4484. {
  4485. struct virtnet_info *vi = vdev->priv;
  4486. int err;
  4487. err = init_vqs(vi);
  4488. if (err)
  4489. return err;
  4490. virtio_device_ready(vdev);
  4491. enable_delayed_refill(vi);
  4492. enable_rx_mode_work(vi);
  4493. if (netif_running(vi->dev)) {
  4494. err = virtnet_open(vi->dev);
  4495. if (err)
  4496. return err;
  4497. }
  4498. netif_tx_lock_bh(vi->dev);
  4499. netif_device_attach(vi->dev);
  4500. netif_tx_unlock_bh(vi->dev);
  4501. return err;
  4502. }
  4503. static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
  4504. {
  4505. __virtio64 *_offloads __free(kfree) = NULL;
  4506. struct scatterlist sg;
  4507. _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL);
  4508. if (!_offloads)
  4509. return -ENOMEM;
  4510. *_offloads = cpu_to_virtio64(vi->vdev, offloads);
  4511. sg_init_one(&sg, _offloads, sizeof(*_offloads));
  4512. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
  4513. VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
  4514. dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
  4515. return -EINVAL;
  4516. }
  4517. return 0;
  4518. }
  4519. static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
  4520. {
  4521. u64 offloads = 0;
  4522. if (!vi->guest_offloads)
  4523. return 0;
  4524. return virtnet_set_guest_offloads(vi, offloads);
  4525. }
  4526. static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
  4527. {
  4528. u64 offloads = vi->guest_offloads;
  4529. if (!vi->guest_offloads)
  4530. return 0;
  4531. return virtnet_set_guest_offloads(vi, offloads);
  4532. }
  4533. static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
  4534. struct xsk_buff_pool *pool)
  4535. {
  4536. int err, qindex;
  4537. qindex = rq - vi->rq;
  4538. if (pool) {
  4539. err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id);
  4540. if (err < 0)
  4541. return err;
  4542. err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info,
  4543. MEM_TYPE_XSK_BUFF_POOL, NULL);
  4544. if (err < 0)
  4545. goto unreg;
  4546. xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info);
  4547. }
  4548. virtnet_rx_pause(vi, rq);
  4549. err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL);
  4550. if (err) {
  4551. netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
  4552. pool = NULL;
  4553. }
  4554. rq->xsk_pool = pool;
  4555. virtnet_rx_resume(vi, rq);
  4556. if (pool)
  4557. return 0;
  4558. unreg:
  4559. xdp_rxq_info_unreg(&rq->xsk_rxq_info);
  4560. return err;
  4561. }
  4562. static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi,
  4563. struct send_queue *sq,
  4564. struct xsk_buff_pool *pool)
  4565. {
  4566. int err, qindex;
  4567. qindex = sq - vi->sq;
  4568. virtnet_tx_pause(vi, sq);
  4569. err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf,
  4570. virtnet_sq_free_unused_buf_done);
  4571. if (err) {
  4572. netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err);
  4573. pool = NULL;
  4574. }
  4575. sq->xsk_pool = pool;
  4576. virtnet_tx_resume(vi, sq);
  4577. return err;
  4578. }
  4579. static int virtnet_xsk_pool_enable(struct net_device *dev,
  4580. struct xsk_buff_pool *pool,
  4581. u16 qid)
  4582. {
  4583. struct virtnet_info *vi = netdev_priv(dev);
  4584. struct receive_queue *rq;
  4585. struct device *dma_dev;
  4586. struct send_queue *sq;
  4587. dma_addr_t hdr_dma;
  4588. int err, size;
  4589. if (vi->hdr_len > xsk_pool_get_headroom(pool))
  4590. return -EINVAL;
  4591. /* In big_packets mode, xdp cannot work, so there is no need to
  4592. * initialize xsk of rq.
  4593. */
  4594. if (vi->big_packets && !vi->mergeable_rx_bufs)
  4595. return -ENOENT;
  4596. if (qid >= vi->curr_queue_pairs)
  4597. return -EINVAL;
  4598. sq = &vi->sq[qid];
  4599. rq = &vi->rq[qid];
  4600. /* xsk assumes that tx and rx must have the same dma device. The af-xdp
  4601. * may use one buffer to receive from the rx and reuse this buffer to
  4602. * send by the tx. So the dma dev of sq and rq must be the same one.
  4603. *
  4604. * But vq->dma_dev allows every vq has the respective dma dev. So I
  4605. * check the dma dev of vq and sq is the same dev.
  4606. */
  4607. if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq))
  4608. return -EINVAL;
  4609. dma_dev = virtqueue_dma_dev(rq->vq);
  4610. if (!dma_dev)
  4611. return -EINVAL;
  4612. size = virtqueue_get_vring_size(rq->vq);
  4613. rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL);
  4614. if (!rq->xsk_buffs)
  4615. return -ENOMEM;
  4616. hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len,
  4617. DMA_TO_DEVICE, 0);
  4618. if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) {
  4619. err = -ENOMEM;
  4620. goto err_free_buffs;
  4621. }
  4622. err = xsk_pool_dma_map(pool, dma_dev, 0);
  4623. if (err)
  4624. goto err_xsk_map;
  4625. err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
  4626. if (err)
  4627. goto err_rq;
  4628. err = virtnet_sq_bind_xsk_pool(vi, sq, pool);
  4629. if (err)
  4630. goto err_sq;
  4631. /* Now, we do not support tx offload(such as tx csum), so all the tx
  4632. * virtnet hdr is zero. So all the tx packets can share a single hdr.
  4633. */
  4634. sq->xsk_hdr_dma_addr = hdr_dma;
  4635. return 0;
  4636. err_sq:
  4637. virtnet_rq_bind_xsk_pool(vi, rq, NULL);
  4638. err_rq:
  4639. xsk_pool_dma_unmap(pool, 0);
  4640. err_xsk_map:
  4641. virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len,
  4642. DMA_TO_DEVICE, 0);
  4643. err_free_buffs:
  4644. kvfree(rq->xsk_buffs);
  4645. return err;
  4646. }
  4647. static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
  4648. {
  4649. struct virtnet_info *vi = netdev_priv(dev);
  4650. struct xsk_buff_pool *pool;
  4651. struct receive_queue *rq;
  4652. struct send_queue *sq;
  4653. int err;
  4654. if (qid >= vi->curr_queue_pairs)
  4655. return -EINVAL;
  4656. sq = &vi->sq[qid];
  4657. rq = &vi->rq[qid];
  4658. pool = rq->xsk_pool;
  4659. err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
  4660. err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL);
  4661. xsk_pool_dma_unmap(pool, 0);
  4662. virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr,
  4663. vi->hdr_len, DMA_TO_DEVICE, 0);
  4664. kvfree(rq->xsk_buffs);
  4665. return err;
  4666. }
  4667. static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
  4668. {
  4669. if (xdp->xsk.pool)
  4670. return virtnet_xsk_pool_enable(dev, xdp->xsk.pool,
  4671. xdp->xsk.queue_id);
  4672. else
  4673. return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id);
  4674. }
  4675. static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
  4676. struct netlink_ext_ack *extack)
  4677. {
  4678. unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
  4679. sizeof(struct skb_shared_info));
  4680. unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
  4681. struct virtnet_info *vi = netdev_priv(dev);
  4682. struct bpf_prog *old_prog;
  4683. u16 xdp_qp = 0, curr_qp;
  4684. int i, err;
  4685. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
  4686. && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  4687. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
  4688. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
  4689. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
  4690. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
  4691. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
  4692. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
  4693. NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
  4694. return -EOPNOTSUPP;
  4695. }
  4696. if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
  4697. NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
  4698. return -EINVAL;
  4699. }
  4700. if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
  4701. NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
  4702. netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
  4703. return -EINVAL;
  4704. }
  4705. curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
  4706. if (prog)
  4707. xdp_qp = nr_cpu_ids;
  4708. /* XDP requires extra queues for XDP_TX */
  4709. if (curr_qp + xdp_qp > vi->max_queue_pairs) {
  4710. netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
  4711. curr_qp + xdp_qp, vi->max_queue_pairs);
  4712. xdp_qp = 0;
  4713. }
  4714. old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
  4715. if (!prog && !old_prog)
  4716. return 0;
  4717. if (prog)
  4718. bpf_prog_add(prog, vi->max_queue_pairs - 1);
  4719. /* Make sure NAPI is not using any XDP TX queues for RX. */
  4720. if (netif_running(dev)) {
  4721. for (i = 0; i < vi->max_queue_pairs; i++) {
  4722. napi_disable(&vi->rq[i].napi);
  4723. virtnet_napi_tx_disable(&vi->sq[i].napi);
  4724. }
  4725. }
  4726. if (!prog) {
  4727. for (i = 0; i < vi->max_queue_pairs; i++) {
  4728. rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
  4729. if (i == 0)
  4730. virtnet_restore_guest_offloads(vi);
  4731. }
  4732. synchronize_net();
  4733. }
  4734. err = virtnet_set_queues(vi, curr_qp + xdp_qp);
  4735. if (err)
  4736. goto err;
  4737. netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
  4738. vi->xdp_queue_pairs = xdp_qp;
  4739. if (prog) {
  4740. vi->xdp_enabled = true;
  4741. for (i = 0; i < vi->max_queue_pairs; i++) {
  4742. rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
  4743. if (i == 0 && !old_prog)
  4744. virtnet_clear_guest_offloads(vi);
  4745. }
  4746. if (!old_prog)
  4747. xdp_features_set_redirect_target(dev, true);
  4748. } else {
  4749. xdp_features_clear_redirect_target(dev);
  4750. vi->xdp_enabled = false;
  4751. }
  4752. for (i = 0; i < vi->max_queue_pairs; i++) {
  4753. if (old_prog)
  4754. bpf_prog_put(old_prog);
  4755. if (netif_running(dev)) {
  4756. virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
  4757. virtnet_napi_tx_enable(vi, vi->sq[i].vq,
  4758. &vi->sq[i].napi);
  4759. }
  4760. }
  4761. return 0;
  4762. err:
  4763. if (!prog) {
  4764. virtnet_clear_guest_offloads(vi);
  4765. for (i = 0; i < vi->max_queue_pairs; i++)
  4766. rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
  4767. }
  4768. if (netif_running(dev)) {
  4769. for (i = 0; i < vi->max_queue_pairs; i++) {
  4770. virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
  4771. virtnet_napi_tx_enable(vi, vi->sq[i].vq,
  4772. &vi->sq[i].napi);
  4773. }
  4774. }
  4775. if (prog)
  4776. bpf_prog_sub(prog, vi->max_queue_pairs - 1);
  4777. return err;
  4778. }
  4779. static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  4780. {
  4781. switch (xdp->command) {
  4782. case XDP_SETUP_PROG:
  4783. return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
  4784. case XDP_SETUP_XSK_POOL:
  4785. return virtnet_xsk_pool_setup(dev, xdp);
  4786. default:
  4787. return -EINVAL;
  4788. }
  4789. }
  4790. static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
  4791. size_t len)
  4792. {
  4793. struct virtnet_info *vi = netdev_priv(dev);
  4794. int ret;
  4795. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  4796. return -EOPNOTSUPP;
  4797. ret = snprintf(buf, len, "sby");
  4798. if (ret >= len)
  4799. return -EOPNOTSUPP;
  4800. return 0;
  4801. }
  4802. static int virtnet_set_features(struct net_device *dev,
  4803. netdev_features_t features)
  4804. {
  4805. struct virtnet_info *vi = netdev_priv(dev);
  4806. u64 offloads;
  4807. int err;
  4808. if ((dev->features ^ features) & NETIF_F_GRO_HW) {
  4809. if (vi->xdp_enabled)
  4810. return -EBUSY;
  4811. if (features & NETIF_F_GRO_HW)
  4812. offloads = vi->guest_offloads_capable;
  4813. else
  4814. offloads = vi->guest_offloads_capable &
  4815. ~GUEST_OFFLOAD_GRO_HW_MASK;
  4816. err = virtnet_set_guest_offloads(vi, offloads);
  4817. if (err)
  4818. return err;
  4819. vi->guest_offloads = offloads;
  4820. }
  4821. if ((dev->features ^ features) & NETIF_F_RXHASH) {
  4822. if (features & NETIF_F_RXHASH)
  4823. vi->rss.hash_types = vi->rss_hash_types_saved;
  4824. else
  4825. vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE;
  4826. if (!virtnet_commit_rss_command(vi))
  4827. return -EINVAL;
  4828. }
  4829. return 0;
  4830. }
  4831. static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
  4832. {
  4833. struct virtnet_info *priv = netdev_priv(dev);
  4834. struct send_queue *sq = &priv->sq[txqueue];
  4835. struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
  4836. u64_stats_update_begin(&sq->stats.syncp);
  4837. u64_stats_inc(&sq->stats.tx_timeouts);
  4838. u64_stats_update_end(&sq->stats.syncp);
  4839. netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
  4840. txqueue, sq->name, sq->vq->index, sq->vq->name,
  4841. jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
  4842. }
  4843. static int virtnet_init_irq_moder(struct virtnet_info *vi)
  4844. {
  4845. u8 profile_flags = 0, coal_flags = 0;
  4846. int ret, i;
  4847. profile_flags |= DIM_PROFILE_RX;
  4848. coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS;
  4849. ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags,
  4850. DIM_CQ_PERIOD_MODE_START_FROM_EQE,
  4851. 0, virtnet_rx_dim_work, NULL);
  4852. if (ret)
  4853. return ret;
  4854. for (i = 0; i < vi->max_queue_pairs; i++)
  4855. net_dim_setting(vi->dev, &vi->rq[i].dim, false);
  4856. return 0;
  4857. }
  4858. static void virtnet_free_irq_moder(struct virtnet_info *vi)
  4859. {
  4860. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
  4861. return;
  4862. rtnl_lock();
  4863. net_dim_free_irq_moder(vi->dev);
  4864. rtnl_unlock();
  4865. }
  4866. static const struct net_device_ops virtnet_netdev = {
  4867. .ndo_open = virtnet_open,
  4868. .ndo_stop = virtnet_close,
  4869. .ndo_start_xmit = start_xmit,
  4870. .ndo_validate_addr = eth_validate_addr,
  4871. .ndo_set_mac_address = virtnet_set_mac_address,
  4872. .ndo_set_rx_mode = virtnet_set_rx_mode,
  4873. .ndo_get_stats64 = virtnet_stats,
  4874. .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
  4875. .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
  4876. .ndo_bpf = virtnet_xdp,
  4877. .ndo_xdp_xmit = virtnet_xdp_xmit,
  4878. .ndo_xsk_wakeup = virtnet_xsk_wakeup,
  4879. .ndo_features_check = passthru_features_check,
  4880. .ndo_get_phys_port_name = virtnet_get_phys_port_name,
  4881. .ndo_set_features = virtnet_set_features,
  4882. .ndo_tx_timeout = virtnet_tx_timeout,
  4883. };
  4884. static void virtnet_config_changed_work(struct work_struct *work)
  4885. {
  4886. struct virtnet_info *vi =
  4887. container_of(work, struct virtnet_info, config_work);
  4888. u16 v;
  4889. if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
  4890. struct virtio_net_config, status, &v) < 0)
  4891. return;
  4892. if (v & VIRTIO_NET_S_ANNOUNCE) {
  4893. netdev_notify_peers(vi->dev);
  4894. virtnet_ack_link_announce(vi);
  4895. }
  4896. /* Ignore unknown (future) status bits */
  4897. v &= VIRTIO_NET_S_LINK_UP;
  4898. if (vi->status == v)
  4899. return;
  4900. vi->status = v;
  4901. if (vi->status & VIRTIO_NET_S_LINK_UP) {
  4902. virtnet_update_settings(vi);
  4903. netif_carrier_on(vi->dev);
  4904. netif_tx_wake_all_queues(vi->dev);
  4905. } else {
  4906. netif_carrier_off(vi->dev);
  4907. netif_tx_stop_all_queues(vi->dev);
  4908. }
  4909. }
  4910. static void virtnet_config_changed(struct virtio_device *vdev)
  4911. {
  4912. struct virtnet_info *vi = vdev->priv;
  4913. schedule_work(&vi->config_work);
  4914. }
  4915. static void virtnet_free_queues(struct virtnet_info *vi)
  4916. {
  4917. int i;
  4918. for (i = 0; i < vi->max_queue_pairs; i++) {
  4919. __netif_napi_del(&vi->rq[i].napi);
  4920. __netif_napi_del(&vi->sq[i].napi);
  4921. }
  4922. /* We called __netif_napi_del(),
  4923. * we need to respect an RCU grace period before freeing vi->rq
  4924. */
  4925. synchronize_net();
  4926. kfree(vi->rq);
  4927. kfree(vi->sq);
  4928. kfree(vi->ctrl);
  4929. }
  4930. static void _free_receive_bufs(struct virtnet_info *vi)
  4931. {
  4932. struct bpf_prog *old_prog;
  4933. int i;
  4934. for (i = 0; i < vi->max_queue_pairs; i++) {
  4935. while (vi->rq[i].pages)
  4936. __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
  4937. old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
  4938. RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
  4939. if (old_prog)
  4940. bpf_prog_put(old_prog);
  4941. }
  4942. }
  4943. static void free_receive_bufs(struct virtnet_info *vi)
  4944. {
  4945. rtnl_lock();
  4946. _free_receive_bufs(vi);
  4947. rtnl_unlock();
  4948. }
  4949. static void free_receive_page_frags(struct virtnet_info *vi)
  4950. {
  4951. int i;
  4952. for (i = 0; i < vi->max_queue_pairs; i++)
  4953. if (vi->rq[i].alloc_frag.page) {
  4954. if (vi->rq[i].do_dma && vi->rq[i].last_dma)
  4955. virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
  4956. put_page(vi->rq[i].alloc_frag.page);
  4957. }
  4958. }
  4959. static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
  4960. {
  4961. if (!is_xdp_frame(buf))
  4962. dev_kfree_skb(buf);
  4963. else
  4964. xdp_return_frame(ptr_to_xdp(buf));
  4965. }
  4966. static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq)
  4967. {
  4968. struct virtnet_info *vi = vq->vdev->priv;
  4969. int i = vq2txq(vq);
  4970. netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i));
  4971. }
  4972. static void free_unused_bufs(struct virtnet_info *vi)
  4973. {
  4974. void *buf;
  4975. int i;
  4976. for (i = 0; i < vi->max_queue_pairs; i++) {
  4977. struct virtqueue *vq = vi->sq[i].vq;
  4978. while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
  4979. virtnet_sq_free_unused_buf(vq, buf);
  4980. cond_resched();
  4981. }
  4982. for (i = 0; i < vi->max_queue_pairs; i++) {
  4983. struct virtqueue *vq = vi->rq[i].vq;
  4984. while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
  4985. virtnet_rq_unmap_free_buf(vq, buf);
  4986. cond_resched();
  4987. }
  4988. }
  4989. static void virtnet_del_vqs(struct virtnet_info *vi)
  4990. {
  4991. struct virtio_device *vdev = vi->vdev;
  4992. virtnet_clean_affinity(vi);
  4993. vdev->config->del_vqs(vdev);
  4994. virtnet_free_queues(vi);
  4995. }
  4996. /* How large should a single buffer be so a queue full of these can fit at
  4997. * least one full packet?
  4998. * Logic below assumes the mergeable buffer header is used.
  4999. */
  5000. static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
  5001. {
  5002. const unsigned int hdr_len = vi->hdr_len;
  5003. unsigned int rq_size = virtqueue_get_vring_size(vq);
  5004. unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
  5005. unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
  5006. unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
  5007. return max(max(min_buf_len, hdr_len) - hdr_len,
  5008. (unsigned int)GOOD_PACKET_LEN);
  5009. }
  5010. static int virtnet_find_vqs(struct virtnet_info *vi)
  5011. {
  5012. struct virtqueue_info *vqs_info;
  5013. struct virtqueue **vqs;
  5014. int ret = -ENOMEM;
  5015. int total_vqs;
  5016. bool *ctx;
  5017. u16 i;
  5018. /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
  5019. * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
  5020. * possible control vq.
  5021. */
  5022. total_vqs = vi->max_queue_pairs * 2 +
  5023. virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
  5024. /* Allocate space for find_vqs parameters */
  5025. vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
  5026. if (!vqs)
  5027. goto err_vq;
  5028. vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
  5029. if (!vqs_info)
  5030. goto err_vqs_info;
  5031. if (!vi->big_packets || vi->mergeable_rx_bufs) {
  5032. ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
  5033. if (!ctx)
  5034. goto err_ctx;
  5035. } else {
  5036. ctx = NULL;
  5037. }
  5038. /* Parameters for control virtqueue, if any */
  5039. if (vi->has_cvq) {
  5040. vqs_info[total_vqs - 1].name = "control";
  5041. }
  5042. /* Allocate/initialize parameters for send/receive virtqueues */
  5043. for (i = 0; i < vi->max_queue_pairs; i++) {
  5044. vqs_info[rxq2vq(i)].callback = skb_recv_done;
  5045. vqs_info[txq2vq(i)].callback = skb_xmit_done;
  5046. sprintf(vi->rq[i].name, "input.%u", i);
  5047. sprintf(vi->sq[i].name, "output.%u", i);
  5048. vqs_info[rxq2vq(i)].name = vi->rq[i].name;
  5049. vqs_info[txq2vq(i)].name = vi->sq[i].name;
  5050. if (ctx)
  5051. vqs_info[rxq2vq(i)].ctx = true;
  5052. }
  5053. ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL);
  5054. if (ret)
  5055. goto err_find;
  5056. if (vi->has_cvq) {
  5057. vi->cvq = vqs[total_vqs - 1];
  5058. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
  5059. vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
  5060. }
  5061. for (i = 0; i < vi->max_queue_pairs; i++) {
  5062. vi->rq[i].vq = vqs[rxq2vq(i)];
  5063. vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
  5064. vi->sq[i].vq = vqs[txq2vq(i)];
  5065. }
  5066. /* run here: ret == 0. */
  5067. err_find:
  5068. kfree(ctx);
  5069. err_ctx:
  5070. kfree(vqs_info);
  5071. err_vqs_info:
  5072. kfree(vqs);
  5073. err_vq:
  5074. return ret;
  5075. }
  5076. static int virtnet_alloc_queues(struct virtnet_info *vi)
  5077. {
  5078. int i;
  5079. if (vi->has_cvq) {
  5080. vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
  5081. if (!vi->ctrl)
  5082. goto err_ctrl;
  5083. } else {
  5084. vi->ctrl = NULL;
  5085. }
  5086. vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
  5087. if (!vi->sq)
  5088. goto err_sq;
  5089. vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
  5090. if (!vi->rq)
  5091. goto err_rq;
  5092. INIT_DELAYED_WORK(&vi->refill, refill_work);
  5093. for (i = 0; i < vi->max_queue_pairs; i++) {
  5094. vi->rq[i].pages = NULL;
  5095. netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll,
  5096. napi_weight);
  5097. netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi,
  5098. virtnet_poll_tx,
  5099. napi_tx ? napi_weight : 0);
  5100. sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
  5101. ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
  5102. sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
  5103. u64_stats_init(&vi->rq[i].stats.syncp);
  5104. u64_stats_init(&vi->sq[i].stats.syncp);
  5105. mutex_init(&vi->rq[i].dim_lock);
  5106. }
  5107. return 0;
  5108. err_rq:
  5109. kfree(vi->sq);
  5110. err_sq:
  5111. kfree(vi->ctrl);
  5112. err_ctrl:
  5113. return -ENOMEM;
  5114. }
  5115. static int init_vqs(struct virtnet_info *vi)
  5116. {
  5117. int ret;
  5118. /* Allocate send & receive queues */
  5119. ret = virtnet_alloc_queues(vi);
  5120. if (ret)
  5121. goto err;
  5122. ret = virtnet_find_vqs(vi);
  5123. if (ret)
  5124. goto err_free;
  5125. cpus_read_lock();
  5126. virtnet_set_affinity(vi);
  5127. cpus_read_unlock();
  5128. return 0;
  5129. err_free:
  5130. virtnet_free_queues(vi);
  5131. err:
  5132. return ret;
  5133. }
  5134. #ifdef CONFIG_SYSFS
  5135. static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
  5136. char *buf)
  5137. {
  5138. struct virtnet_info *vi = netdev_priv(queue->dev);
  5139. unsigned int queue_index = get_netdev_rx_queue_index(queue);
  5140. unsigned int headroom = virtnet_get_headroom(vi);
  5141. unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  5142. struct ewma_pkt_len *avg;
  5143. BUG_ON(queue_index >= vi->max_queue_pairs);
  5144. avg = &vi->rq[queue_index].mrg_avg_pkt_len;
  5145. return sprintf(buf, "%u\n",
  5146. get_mergeable_buf_len(&vi->rq[queue_index], avg,
  5147. SKB_DATA_ALIGN(headroom + tailroom)));
  5148. }
  5149. static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
  5150. __ATTR_RO(mergeable_rx_buffer_size);
  5151. static struct attribute *virtio_net_mrg_rx_attrs[] = {
  5152. &mergeable_rx_buffer_size_attribute.attr,
  5153. NULL
  5154. };
  5155. static const struct attribute_group virtio_net_mrg_rx_group = {
  5156. .name = "virtio_net",
  5157. .attrs = virtio_net_mrg_rx_attrs
  5158. };
  5159. #endif
  5160. static bool virtnet_fail_on_feature(struct virtio_device *vdev,
  5161. unsigned int fbit,
  5162. const char *fname, const char *dname)
  5163. {
  5164. if (!virtio_has_feature(vdev, fbit))
  5165. return false;
  5166. dev_err(&vdev->dev, "device advertises feature %s but not %s",
  5167. fname, dname);
  5168. return true;
  5169. }
  5170. #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
  5171. virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
  5172. static bool virtnet_validate_features(struct virtio_device *vdev)
  5173. {
  5174. if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
  5175. (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
  5176. "VIRTIO_NET_F_CTRL_VQ") ||
  5177. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
  5178. "VIRTIO_NET_F_CTRL_VQ") ||
  5179. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
  5180. "VIRTIO_NET_F_CTRL_VQ") ||
  5181. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
  5182. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
  5183. "VIRTIO_NET_F_CTRL_VQ") ||
  5184. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
  5185. "VIRTIO_NET_F_CTRL_VQ") ||
  5186. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
  5187. "VIRTIO_NET_F_CTRL_VQ") ||
  5188. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
  5189. "VIRTIO_NET_F_CTRL_VQ") ||
  5190. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
  5191. "VIRTIO_NET_F_CTRL_VQ"))) {
  5192. return false;
  5193. }
  5194. return true;
  5195. }
  5196. #define MIN_MTU ETH_MIN_MTU
  5197. #define MAX_MTU ETH_MAX_MTU
  5198. static int virtnet_validate(struct virtio_device *vdev)
  5199. {
  5200. if (!vdev->config->get) {
  5201. dev_err(&vdev->dev, "%s failure: config access disabled\n",
  5202. __func__);
  5203. return -EINVAL;
  5204. }
  5205. if (!virtnet_validate_features(vdev))
  5206. return -EINVAL;
  5207. if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
  5208. int mtu = virtio_cread16(vdev,
  5209. offsetof(struct virtio_net_config,
  5210. mtu));
  5211. if (mtu < MIN_MTU)
  5212. __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
  5213. }
  5214. if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
  5215. !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
  5216. dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
  5217. __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
  5218. }
  5219. return 0;
  5220. }
  5221. static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
  5222. {
  5223. return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  5224. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
  5225. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
  5226. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
  5227. (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
  5228. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
  5229. }
  5230. static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
  5231. {
  5232. bool guest_gso = virtnet_check_guest_gso(vi);
  5233. /* If device can receive ANY guest GSO packets, regardless of mtu,
  5234. * allocate packets of maximum size, otherwise limit it to only
  5235. * mtu size worth only.
  5236. */
  5237. if (mtu > ETH_DATA_LEN || guest_gso) {
  5238. vi->big_packets = true;
  5239. vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
  5240. }
  5241. }
  5242. #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
  5243. static enum xdp_rss_hash_type
  5244. virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
  5245. [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
  5246. [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
  5247. [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
  5248. [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
  5249. [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
  5250. [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
  5251. [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
  5252. [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
  5253. [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
  5254. [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
  5255. };
  5256. static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
  5257. enum xdp_rss_hash_type *rss_type)
  5258. {
  5259. const struct xdp_buff *xdp = (void *)_ctx;
  5260. struct virtio_net_hdr_v1_hash *hdr_hash;
  5261. struct virtnet_info *vi;
  5262. u16 hash_report;
  5263. if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
  5264. return -ENODATA;
  5265. vi = netdev_priv(xdp->rxq->dev);
  5266. hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
  5267. hash_report = __le16_to_cpu(hdr_hash->hash_report);
  5268. if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
  5269. hash_report = VIRTIO_NET_HASH_REPORT_NONE;
  5270. *rss_type = virtnet_xdp_rss_type[hash_report];
  5271. *hash = __le32_to_cpu(hdr_hash->hash_value);
  5272. return 0;
  5273. }
  5274. static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
  5275. .xmo_rx_hash = virtnet_xdp_rx_hash,
  5276. };
  5277. static int virtnet_probe(struct virtio_device *vdev)
  5278. {
  5279. int i, err = -ENOMEM;
  5280. struct net_device *dev;
  5281. struct virtnet_info *vi;
  5282. u16 max_queue_pairs;
  5283. int mtu = 0;
  5284. /* Find if host supports multiqueue/rss virtio_net device */
  5285. max_queue_pairs = 1;
  5286. if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
  5287. max_queue_pairs =
  5288. virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
  5289. /* We need at least 2 queue's */
  5290. if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
  5291. max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
  5292. !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
  5293. max_queue_pairs = 1;
  5294. /* Allocate ourselves a network device with room for our info */
  5295. dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
  5296. if (!dev)
  5297. return -ENOMEM;
  5298. /* Set up network device as normal. */
  5299. dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
  5300. IFF_TX_SKB_NO_LINEAR;
  5301. dev->netdev_ops = &virtnet_netdev;
  5302. dev->stat_ops = &virtnet_stat_ops;
  5303. dev->features = NETIF_F_HIGHDMA;
  5304. dev->ethtool_ops = &virtnet_ethtool_ops;
  5305. SET_NETDEV_DEV(dev, &vdev->dev);
  5306. /* Do we support "hardware" checksums? */
  5307. if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
  5308. /* This opens up the world of extra features. */
  5309. dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
  5310. if (csum)
  5311. dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
  5312. if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
  5313. dev->hw_features |= NETIF_F_TSO
  5314. | NETIF_F_TSO_ECN | NETIF_F_TSO6;
  5315. }
  5316. /* Individual feature bits: what can host handle? */
  5317. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
  5318. dev->hw_features |= NETIF_F_TSO;
  5319. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
  5320. dev->hw_features |= NETIF_F_TSO6;
  5321. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
  5322. dev->hw_features |= NETIF_F_TSO_ECN;
  5323. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
  5324. dev->hw_features |= NETIF_F_GSO_UDP_L4;
  5325. dev->features |= NETIF_F_GSO_ROBUST;
  5326. if (gso)
  5327. dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
  5328. /* (!csum && gso) case will be fixed by register_netdev() */
  5329. }
  5330. /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
  5331. * need to calculate checksums for partially checksummed packets,
  5332. * as they're considered valid by the upper layer.
  5333. * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
  5334. * receives fully checksummed packets. The device may assist in
  5335. * validating these packets' checksums, so the driver won't have to.
  5336. */
  5337. dev->features |= NETIF_F_RXCSUM;
  5338. if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  5339. virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
  5340. dev->features |= NETIF_F_GRO_HW;
  5341. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
  5342. dev->hw_features |= NETIF_F_GRO_HW;
  5343. dev->vlan_features = dev->features;
  5344. dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
  5345. /* MTU range: 68 - 65535 */
  5346. dev->min_mtu = MIN_MTU;
  5347. dev->max_mtu = MAX_MTU;
  5348. /* Configuration may specify what MAC to use. Otherwise random. */
  5349. if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
  5350. u8 addr[ETH_ALEN];
  5351. virtio_cread_bytes(vdev,
  5352. offsetof(struct virtio_net_config, mac),
  5353. addr, ETH_ALEN);
  5354. eth_hw_addr_set(dev, addr);
  5355. } else {
  5356. eth_hw_addr_random(dev);
  5357. dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
  5358. dev->dev_addr);
  5359. }
  5360. /* Set up our device-specific information */
  5361. vi = netdev_priv(dev);
  5362. vi->dev = dev;
  5363. vi->vdev = vdev;
  5364. vdev->priv = vi;
  5365. INIT_WORK(&vi->config_work, virtnet_config_changed_work);
  5366. INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
  5367. spin_lock_init(&vi->refill_lock);
  5368. if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
  5369. vi->mergeable_rx_bufs = true;
  5370. dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
  5371. }
  5372. if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
  5373. vi->has_rss_hash_report = true;
  5374. if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
  5375. vi->has_rss = true;
  5376. vi->rss_indir_table_size =
  5377. virtio_cread16(vdev, offsetof(struct virtio_net_config,
  5378. rss_max_indirection_table_length));
  5379. }
  5380. err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size);
  5381. if (err)
  5382. goto free;
  5383. if (vi->has_rss || vi->has_rss_hash_report) {
  5384. vi->rss_key_size =
  5385. virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
  5386. if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
  5387. dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n",
  5388. vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE);
  5389. err = -EINVAL;
  5390. goto free;
  5391. }
  5392. vi->rss_hash_types_supported =
  5393. virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
  5394. vi->rss_hash_types_supported &=
  5395. ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
  5396. VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
  5397. VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
  5398. dev->hw_features |= NETIF_F_RXHASH;
  5399. dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
  5400. }
  5401. if (vi->has_rss_hash_report)
  5402. vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
  5403. else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
  5404. virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
  5405. vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  5406. else
  5407. vi->hdr_len = sizeof(struct virtio_net_hdr);
  5408. if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
  5409. virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
  5410. vi->any_header_sg = true;
  5411. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
  5412. vi->has_cvq = true;
  5413. mutex_init(&vi->cvq_lock);
  5414. if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
  5415. mtu = virtio_cread16(vdev,
  5416. offsetof(struct virtio_net_config,
  5417. mtu));
  5418. if (mtu < dev->min_mtu) {
  5419. /* Should never trigger: MTU was previously validated
  5420. * in virtnet_validate.
  5421. */
  5422. dev_err(&vdev->dev,
  5423. "device MTU appears to have changed it is now %d < %d",
  5424. mtu, dev->min_mtu);
  5425. err = -EINVAL;
  5426. goto free;
  5427. }
  5428. dev->mtu = mtu;
  5429. dev->max_mtu = mtu;
  5430. }
  5431. virtnet_set_big_packets(vi, mtu);
  5432. if (vi->any_header_sg)
  5433. dev->needed_headroom = vi->hdr_len;
  5434. /* Enable multiqueue by default */
  5435. if (num_online_cpus() >= max_queue_pairs)
  5436. vi->curr_queue_pairs = max_queue_pairs;
  5437. else
  5438. vi->curr_queue_pairs = num_online_cpus();
  5439. vi->max_queue_pairs = max_queue_pairs;
  5440. /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
  5441. err = init_vqs(vi);
  5442. if (err)
  5443. goto free;
  5444. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
  5445. vi->intr_coal_rx.max_usecs = 0;
  5446. vi->intr_coal_tx.max_usecs = 0;
  5447. vi->intr_coal_rx.max_packets = 0;
  5448. /* Keep the default values of the coalescing parameters
  5449. * aligned with the default napi_tx state.
  5450. */
  5451. if (vi->sq[0].napi.weight)
  5452. vi->intr_coal_tx.max_packets = 1;
  5453. else
  5454. vi->intr_coal_tx.max_packets = 0;
  5455. }
  5456. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
  5457. /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
  5458. for (i = 0; i < vi->max_queue_pairs; i++)
  5459. if (vi->sq[i].napi.weight)
  5460. vi->sq[i].intr_coal.max_packets = 1;
  5461. err = virtnet_init_irq_moder(vi);
  5462. if (err)
  5463. goto free;
  5464. }
  5465. #ifdef CONFIG_SYSFS
  5466. if (vi->mergeable_rx_bufs)
  5467. dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
  5468. #endif
  5469. netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
  5470. netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
  5471. virtnet_init_settings(dev);
  5472. if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
  5473. vi->failover = net_failover_create(vi->dev);
  5474. if (IS_ERR(vi->failover)) {
  5475. err = PTR_ERR(vi->failover);
  5476. goto free_vqs;
  5477. }
  5478. }
  5479. if (vi->has_rss || vi->has_rss_hash_report)
  5480. virtnet_init_default_rss(vi);
  5481. enable_rx_mode_work(vi);
  5482. /* serialize netdev register + virtio_device_ready() with ndo_open() */
  5483. rtnl_lock();
  5484. err = register_netdevice(dev);
  5485. if (err) {
  5486. pr_debug("virtio_net: registering device failed\n");
  5487. rtnl_unlock();
  5488. goto free_failover;
  5489. }
  5490. /* Disable config change notification until ndo_open. */
  5491. virtio_config_driver_disable(vi->vdev);
  5492. virtio_device_ready(vdev);
  5493. if (vi->has_rss || vi->has_rss_hash_report) {
  5494. if (!virtnet_commit_rss_command(vi)) {
  5495. dev_warn(&vdev->dev, "RSS disabled because committing failed.\n");
  5496. dev->hw_features &= ~NETIF_F_RXHASH;
  5497. vi->has_rss_hash_report = false;
  5498. vi->has_rss = false;
  5499. }
  5500. }
  5501. virtnet_set_queues(vi, vi->curr_queue_pairs);
  5502. /* a random MAC address has been assigned, notify the device.
  5503. * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
  5504. * because many devices work fine without getting MAC explicitly
  5505. */
  5506. if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
  5507. virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  5508. struct scatterlist sg;
  5509. sg_init_one(&sg, dev->dev_addr, dev->addr_len);
  5510. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  5511. VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
  5512. pr_debug("virtio_net: setting MAC address failed\n");
  5513. rtnl_unlock();
  5514. err = -EINVAL;
  5515. goto free_unregister_netdev;
  5516. }
  5517. }
  5518. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) {
  5519. struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL;
  5520. struct scatterlist sg;
  5521. __le64 v;
  5522. stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL);
  5523. if (!stats_cap) {
  5524. rtnl_unlock();
  5525. err = -ENOMEM;
  5526. goto free_unregister_netdev;
  5527. }
  5528. sg_init_one(&sg, stats_cap, sizeof(*stats_cap));
  5529. if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
  5530. VIRTIO_NET_CTRL_STATS_QUERY,
  5531. NULL, &sg)) {
  5532. pr_debug("virtio_net: fail to get stats capability\n");
  5533. rtnl_unlock();
  5534. err = -EINVAL;
  5535. goto free_unregister_netdev;
  5536. }
  5537. v = stats_cap->supported_stats_types[0];
  5538. vi->device_stats_cap = le64_to_cpu(v);
  5539. }
  5540. /* Assume link up if device can't report link status,
  5541. otherwise get link status from config. */
  5542. netif_carrier_off(dev);
  5543. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
  5544. virtio_config_changed(vi->vdev);
  5545. } else {
  5546. vi->status = VIRTIO_NET_S_LINK_UP;
  5547. virtnet_update_settings(vi);
  5548. netif_carrier_on(dev);
  5549. }
  5550. for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
  5551. if (virtio_has_feature(vi->vdev, guest_offloads[i]))
  5552. set_bit(guest_offloads[i], &vi->guest_offloads);
  5553. vi->guest_offloads_capable = vi->guest_offloads;
  5554. rtnl_unlock();
  5555. err = virtnet_cpu_notif_add(vi);
  5556. if (err) {
  5557. pr_debug("virtio_net: registering cpu notifier failed\n");
  5558. goto free_unregister_netdev;
  5559. }
  5560. pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
  5561. dev->name, max_queue_pairs);
  5562. return 0;
  5563. free_unregister_netdev:
  5564. unregister_netdev(dev);
  5565. free_failover:
  5566. net_failover_destroy(vi->failover);
  5567. free_vqs:
  5568. virtio_reset_device(vdev);
  5569. cancel_delayed_work_sync(&vi->refill);
  5570. free_receive_page_frags(vi);
  5571. virtnet_del_vqs(vi);
  5572. free:
  5573. free_netdev(dev);
  5574. return err;
  5575. }
  5576. static void remove_vq_common(struct virtnet_info *vi)
  5577. {
  5578. int i;
  5579. virtio_reset_device(vi->vdev);
  5580. /* Free unused buffers in both send and recv, if any. */
  5581. free_unused_bufs(vi);
  5582. /*
  5583. * Rule of thumb is netdev_tx_reset_queue() should follow any
  5584. * skb freeing not followed by netdev_tx_completed_queue()
  5585. */
  5586. for (i = 0; i < vi->max_queue_pairs; i++)
  5587. netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i));
  5588. free_receive_bufs(vi);
  5589. free_receive_page_frags(vi);
  5590. virtnet_del_vqs(vi);
  5591. }
  5592. static void virtnet_remove(struct virtio_device *vdev)
  5593. {
  5594. struct virtnet_info *vi = vdev->priv;
  5595. virtnet_cpu_notif_remove(vi);
  5596. /* Make sure no work handler is accessing the device. */
  5597. flush_work(&vi->config_work);
  5598. disable_rx_mode_work(vi);
  5599. flush_work(&vi->rx_mode_work);
  5600. virtnet_free_irq_moder(vi);
  5601. unregister_netdev(vi->dev);
  5602. net_failover_destroy(vi->failover);
  5603. remove_vq_common(vi);
  5604. rss_indirection_table_free(&vi->rss);
  5605. free_netdev(vi->dev);
  5606. }
  5607. static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
  5608. {
  5609. struct virtnet_info *vi = vdev->priv;
  5610. virtnet_cpu_notif_remove(vi);
  5611. virtnet_freeze_down(vdev);
  5612. remove_vq_common(vi);
  5613. return 0;
  5614. }
  5615. static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
  5616. {
  5617. struct virtnet_info *vi = vdev->priv;
  5618. int err;
  5619. err = virtnet_restore_up(vdev);
  5620. if (err)
  5621. return err;
  5622. virtnet_set_queues(vi, vi->curr_queue_pairs);
  5623. err = virtnet_cpu_notif_add(vi);
  5624. if (err) {
  5625. virtnet_freeze_down(vdev);
  5626. remove_vq_common(vi);
  5627. return err;
  5628. }
  5629. return 0;
  5630. }
  5631. static struct virtio_device_id id_table[] = {
  5632. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  5633. { 0 },
  5634. };
  5635. #define VIRTNET_FEATURES \
  5636. VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
  5637. VIRTIO_NET_F_MAC, \
  5638. VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
  5639. VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
  5640. VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
  5641. VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
  5642. VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
  5643. VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
  5644. VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
  5645. VIRTIO_NET_F_CTRL_MAC_ADDR, \
  5646. VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
  5647. VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
  5648. VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
  5649. VIRTIO_NET_F_VQ_NOTF_COAL, \
  5650. VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
  5651. static unsigned int features[] = {
  5652. VIRTNET_FEATURES,
  5653. };
  5654. static unsigned int features_legacy[] = {
  5655. VIRTNET_FEATURES,
  5656. VIRTIO_NET_F_GSO,
  5657. VIRTIO_F_ANY_LAYOUT,
  5658. };
  5659. static struct virtio_driver virtio_net_driver = {
  5660. .feature_table = features,
  5661. .feature_table_size = ARRAY_SIZE(features),
  5662. .feature_table_legacy = features_legacy,
  5663. .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
  5664. .driver.name = KBUILD_MODNAME,
  5665. .id_table = id_table,
  5666. .validate = virtnet_validate,
  5667. .probe = virtnet_probe,
  5668. .remove = virtnet_remove,
  5669. .config_changed = virtnet_config_changed,
  5670. #ifdef CONFIG_PM_SLEEP
  5671. .freeze = virtnet_freeze,
  5672. .restore = virtnet_restore,
  5673. #endif
  5674. };
  5675. static __init int virtio_net_driver_init(void)
  5676. {
  5677. int ret;
  5678. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
  5679. virtnet_cpu_online,
  5680. virtnet_cpu_down_prep);
  5681. if (ret < 0)
  5682. goto out;
  5683. virtionet_online = ret;
  5684. ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
  5685. NULL, virtnet_cpu_dead);
  5686. if (ret)
  5687. goto err_dead;
  5688. ret = register_virtio_driver(&virtio_net_driver);
  5689. if (ret)
  5690. goto err_virtio;
  5691. return 0;
  5692. err_virtio:
  5693. cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
  5694. err_dead:
  5695. cpuhp_remove_multi_state(virtionet_online);
  5696. out:
  5697. return ret;
  5698. }
  5699. module_init(virtio_net_driver_init);
  5700. static __exit void virtio_net_driver_exit(void)
  5701. {
  5702. unregister_virtio_driver(&virtio_net_driver);
  5703. cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
  5704. cpuhp_remove_multi_state(virtionet_online);
  5705. }
  5706. module_exit(virtio_net_driver_exit);
  5707. MODULE_DEVICE_TABLE(virtio, id_table);
  5708. MODULE_DESCRIPTION("Virtio network driver");
  5709. MODULE_LICENSE("GPL");