xfrm_policy.c 111 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * xfrm_policy.c
  4. *
  5. * Changes:
  6. * Mitsuru KANDA @USAGI
  7. * Kazunori MIYAZAWA @USAGI
  8. * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
  9. * IPv6 support
  10. * Kazunori MIYAZAWA @USAGI
  11. * YOSHIFUJI Hideaki
  12. * Split up af-specific portion
  13. * Derek Atkins <derek@ihtfp.com> Add the post_input processor
  14. *
  15. */
  16. #include <linux/err.h>
  17. #include <linux/slab.h>
  18. #include <linux/kmod.h>
  19. #include <linux/list.h>
  20. #include <linux/spinlock.h>
  21. #include <linux/workqueue.h>
  22. #include <linux/notifier.h>
  23. #include <linux/netdevice.h>
  24. #include <linux/netfilter.h>
  25. #include <linux/module.h>
  26. #include <linux/cache.h>
  27. #include <linux/cpu.h>
  28. #include <linux/audit.h>
  29. #include <linux/rhashtable.h>
  30. #include <linux/if_tunnel.h>
  31. #include <linux/icmp.h>
  32. #include <net/dst.h>
  33. #include <net/flow.h>
  34. #include <net/inet_ecn.h>
  35. #include <net/xfrm.h>
  36. #include <net/ip.h>
  37. #include <net/gre.h>
  38. #if IS_ENABLED(CONFIG_IPV6_MIP6)
  39. #include <net/mip6.h>
  40. #endif
  41. #ifdef CONFIG_XFRM_STATISTICS
  42. #include <net/snmp.h>
  43. #endif
  44. #ifdef CONFIG_XFRM_ESPINTCP
  45. #include <net/espintcp.h>
  46. #endif
  47. #include <net/inet_dscp.h>
  48. #include "xfrm_hash.h"
  49. #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
  50. #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
  51. #define XFRM_MAX_QUEUE_LEN 100
  52. struct xfrm_flo {
  53. struct dst_entry *dst_orig;
  54. u8 flags;
  55. };
  56. /* prefixes smaller than this are stored in lists, not trees. */
  57. #define INEXACT_PREFIXLEN_IPV4 16
  58. #define INEXACT_PREFIXLEN_IPV6 48
  59. struct xfrm_pol_inexact_node {
  60. struct rb_node node;
  61. union {
  62. xfrm_address_t addr;
  63. struct rcu_head rcu;
  64. };
  65. u8 prefixlen;
  66. struct rb_root root;
  67. /* the policies matching this node, can be empty list */
  68. struct hlist_head hhead;
  69. };
  70. /* xfrm inexact policy search tree:
  71. * xfrm_pol_inexact_bin = hash(dir,type,family,if_id);
  72. * |
  73. * +---- root_d: sorted by daddr:prefix
  74. * | |
  75. * | xfrm_pol_inexact_node
  76. * | |
  77. * | +- root: sorted by saddr/prefix
  78. * | | |
  79. * | | xfrm_pol_inexact_node
  80. * | | |
  81. * | | + root: unused
  82. * | | |
  83. * | | + hhead: saddr:daddr policies
  84. * | |
  85. * | +- coarse policies and all any:daddr policies
  86. * |
  87. * +---- root_s: sorted by saddr:prefix
  88. * | |
  89. * | xfrm_pol_inexact_node
  90. * | |
  91. * | + root: unused
  92. * | |
  93. * | + hhead: saddr:any policies
  94. * |
  95. * +---- coarse policies and all any:any policies
  96. *
  97. * Lookups return four candidate lists:
  98. * 1. any:any list from top-level xfrm_pol_inexact_bin
  99. * 2. any:daddr list from daddr tree
  100. * 3. saddr:daddr list from 2nd level daddr tree
  101. * 4. saddr:any list from saddr tree
  102. *
  103. * This result set then needs to be searched for the policy with
  104. * the lowest priority. If two candidates have the same priority, the
  105. * struct xfrm_policy pos member with the lower number is used.
  106. *
  107. * This replicates previous single-list-search algorithm which would
  108. * return first matching policy in the (ordered-by-priority) list.
  109. */
  110. struct xfrm_pol_inexact_key {
  111. possible_net_t net;
  112. u32 if_id;
  113. u16 family;
  114. u8 dir, type;
  115. };
  116. struct xfrm_pol_inexact_bin {
  117. struct xfrm_pol_inexact_key k;
  118. struct rhash_head head;
  119. /* list containing '*:*' policies */
  120. struct hlist_head hhead;
  121. seqcount_spinlock_t count;
  122. /* tree sorted by daddr/prefix */
  123. struct rb_root root_d;
  124. /* tree sorted by saddr/prefix */
  125. struct rb_root root_s;
  126. /* slow path below */
  127. struct list_head inexact_bins;
  128. struct rcu_head rcu;
  129. };
  130. enum xfrm_pol_inexact_candidate_type {
  131. XFRM_POL_CAND_BOTH,
  132. XFRM_POL_CAND_SADDR,
  133. XFRM_POL_CAND_DADDR,
  134. XFRM_POL_CAND_ANY,
  135. XFRM_POL_CAND_MAX,
  136. };
  137. struct xfrm_pol_inexact_candidates {
  138. struct hlist_head *res[XFRM_POL_CAND_MAX];
  139. };
  140. struct xfrm_flow_keys {
  141. struct flow_dissector_key_basic basic;
  142. struct flow_dissector_key_control control;
  143. union {
  144. struct flow_dissector_key_ipv4_addrs ipv4;
  145. struct flow_dissector_key_ipv6_addrs ipv6;
  146. } addrs;
  147. struct flow_dissector_key_ip ip;
  148. struct flow_dissector_key_icmp icmp;
  149. struct flow_dissector_key_ports ports;
  150. struct flow_dissector_key_keyid gre;
  151. };
  152. static struct flow_dissector xfrm_session_dissector __ro_after_init;
  153. static DEFINE_SPINLOCK(xfrm_if_cb_lock);
  154. static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
  155. static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
  156. static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
  157. __read_mostly;
  158. static struct kmem_cache *xfrm_dst_cache __ro_after_init;
  159. static struct rhashtable xfrm_policy_inexact_table;
  160. static const struct rhashtable_params xfrm_pol_inexact_params;
  161. static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
  162. static int stale_bundle(struct dst_entry *dst);
  163. static int xfrm_bundle_ok(struct xfrm_dst *xdst);
  164. static void xfrm_policy_queue_process(struct timer_list *t);
  165. static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
  166. static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
  167. int dir);
  168. static struct xfrm_pol_inexact_bin *
  169. xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
  170. u32 if_id);
  171. static struct xfrm_pol_inexact_bin *
  172. xfrm_policy_inexact_lookup_rcu(struct net *net,
  173. u8 type, u16 family, u8 dir, u32 if_id);
  174. static struct xfrm_policy *
  175. xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
  176. bool excl);
  177. static bool
  178. xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
  179. struct xfrm_pol_inexact_bin *b,
  180. const xfrm_address_t *saddr,
  181. const xfrm_address_t *daddr);
  182. static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
  183. {
  184. return refcount_inc_not_zero(&policy->refcnt);
  185. }
  186. static inline bool
  187. __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  188. {
  189. const struct flowi4 *fl4 = &fl->u.ip4;
  190. return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
  191. addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
  192. !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
  193. !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
  194. (fl4->flowi4_proto == sel->proto || !sel->proto) &&
  195. (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
  196. }
  197. static inline bool
  198. __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  199. {
  200. const struct flowi6 *fl6 = &fl->u.ip6;
  201. return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
  202. addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
  203. !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
  204. !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
  205. (fl6->flowi6_proto == sel->proto || !sel->proto) &&
  206. (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
  207. }
  208. bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
  209. unsigned short family)
  210. {
  211. switch (family) {
  212. case AF_INET:
  213. return __xfrm4_selector_match(sel, fl);
  214. case AF_INET6:
  215. return __xfrm6_selector_match(sel, fl);
  216. }
  217. return false;
  218. }
  219. static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
  220. {
  221. const struct xfrm_policy_afinfo *afinfo;
  222. if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo)))
  223. return NULL;
  224. rcu_read_lock();
  225. afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
  226. if (unlikely(!afinfo))
  227. rcu_read_unlock();
  228. return afinfo;
  229. }
  230. /* Called with rcu_read_lock(). */
  231. static const struct xfrm_if_cb *xfrm_if_get_cb(void)
  232. {
  233. return rcu_dereference(xfrm_if_cb);
  234. }
  235. struct dst_entry *__xfrm_dst_lookup(int family,
  236. const struct xfrm_dst_lookup_params *params)
  237. {
  238. const struct xfrm_policy_afinfo *afinfo;
  239. struct dst_entry *dst;
  240. afinfo = xfrm_policy_get_afinfo(family);
  241. if (unlikely(afinfo == NULL))
  242. return ERR_PTR(-EAFNOSUPPORT);
  243. dst = afinfo->dst_lookup(params);
  244. rcu_read_unlock();
  245. return dst;
  246. }
  247. EXPORT_SYMBOL(__xfrm_dst_lookup);
  248. static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
  249. int tos, int oif,
  250. xfrm_address_t *prev_saddr,
  251. xfrm_address_t *prev_daddr,
  252. int family, u32 mark)
  253. {
  254. struct xfrm_dst_lookup_params params;
  255. struct net *net = xs_net(x);
  256. xfrm_address_t *saddr = &x->props.saddr;
  257. xfrm_address_t *daddr = &x->id.daddr;
  258. struct dst_entry *dst;
  259. if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
  260. saddr = x->coaddr;
  261. daddr = prev_daddr;
  262. }
  263. if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
  264. saddr = prev_saddr;
  265. daddr = x->coaddr;
  266. }
  267. params.net = net;
  268. params.saddr = saddr;
  269. params.daddr = daddr;
  270. params.tos = tos;
  271. params.oif = oif;
  272. params.mark = mark;
  273. params.ipproto = x->id.proto;
  274. if (x->encap) {
  275. switch (x->encap->encap_type) {
  276. case UDP_ENCAP_ESPINUDP:
  277. params.ipproto = IPPROTO_UDP;
  278. params.uli.ports.sport = x->encap->encap_sport;
  279. params.uli.ports.dport = x->encap->encap_dport;
  280. break;
  281. case TCP_ENCAP_ESPINTCP:
  282. params.ipproto = IPPROTO_TCP;
  283. params.uli.ports.sport = x->encap->encap_sport;
  284. params.uli.ports.dport = x->encap->encap_dport;
  285. break;
  286. }
  287. }
  288. dst = __xfrm_dst_lookup(family, &params);
  289. if (!IS_ERR(dst)) {
  290. if (prev_saddr != saddr)
  291. memcpy(prev_saddr, saddr, sizeof(*prev_saddr));
  292. if (prev_daddr != daddr)
  293. memcpy(prev_daddr, daddr, sizeof(*prev_daddr));
  294. }
  295. return dst;
  296. }
  297. static inline unsigned long make_jiffies(long secs)
  298. {
  299. if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
  300. return MAX_SCHEDULE_TIMEOUT-1;
  301. else
  302. return secs*HZ;
  303. }
  304. static void xfrm_policy_timer(struct timer_list *t)
  305. {
  306. struct xfrm_policy *xp = from_timer(xp, t, timer);
  307. time64_t now = ktime_get_real_seconds();
  308. time64_t next = TIME64_MAX;
  309. int warn = 0;
  310. int dir;
  311. read_lock(&xp->lock);
  312. if (unlikely(xp->walk.dead))
  313. goto out;
  314. dir = xfrm_policy_id2dir(xp->index);
  315. if (xp->lft.hard_add_expires_seconds) {
  316. time64_t tmo = xp->lft.hard_add_expires_seconds +
  317. xp->curlft.add_time - now;
  318. if (tmo <= 0)
  319. goto expired;
  320. if (tmo < next)
  321. next = tmo;
  322. }
  323. if (xp->lft.hard_use_expires_seconds) {
  324. time64_t tmo = xp->lft.hard_use_expires_seconds +
  325. (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
  326. if (tmo <= 0)
  327. goto expired;
  328. if (tmo < next)
  329. next = tmo;
  330. }
  331. if (xp->lft.soft_add_expires_seconds) {
  332. time64_t tmo = xp->lft.soft_add_expires_seconds +
  333. xp->curlft.add_time - now;
  334. if (tmo <= 0) {
  335. warn = 1;
  336. tmo = XFRM_KM_TIMEOUT;
  337. }
  338. if (tmo < next)
  339. next = tmo;
  340. }
  341. if (xp->lft.soft_use_expires_seconds) {
  342. time64_t tmo = xp->lft.soft_use_expires_seconds +
  343. (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
  344. if (tmo <= 0) {
  345. warn = 1;
  346. tmo = XFRM_KM_TIMEOUT;
  347. }
  348. if (tmo < next)
  349. next = tmo;
  350. }
  351. if (warn)
  352. km_policy_expired(xp, dir, 0, 0);
  353. if (next != TIME64_MAX &&
  354. !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
  355. xfrm_pol_hold(xp);
  356. out:
  357. read_unlock(&xp->lock);
  358. xfrm_pol_put(xp);
  359. return;
  360. expired:
  361. read_unlock(&xp->lock);
  362. if (!xfrm_policy_delete(xp, dir))
  363. km_policy_expired(xp, dir, 1, 0);
  364. xfrm_pol_put(xp);
  365. }
  366. /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  367. * SPD calls.
  368. */
  369. struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
  370. {
  371. struct xfrm_policy *policy;
  372. policy = kzalloc(sizeof(struct xfrm_policy), gfp);
  373. if (policy) {
  374. write_pnet(&policy->xp_net, net);
  375. INIT_LIST_HEAD(&policy->walk.all);
  376. INIT_HLIST_HEAD(&policy->state_cache_list);
  377. INIT_HLIST_NODE(&policy->bydst);
  378. INIT_HLIST_NODE(&policy->byidx);
  379. rwlock_init(&policy->lock);
  380. refcount_set(&policy->refcnt, 1);
  381. skb_queue_head_init(&policy->polq.hold_queue);
  382. timer_setup(&policy->timer, xfrm_policy_timer, 0);
  383. timer_setup(&policy->polq.hold_timer,
  384. xfrm_policy_queue_process, 0);
  385. }
  386. return policy;
  387. }
  388. EXPORT_SYMBOL(xfrm_policy_alloc);
  389. static void xfrm_policy_destroy_rcu(struct rcu_head *head)
  390. {
  391. struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
  392. security_xfrm_policy_free(policy->security);
  393. kfree(policy);
  394. }
  395. /* Destroy xfrm_policy: descendant resources must be released to this moment. */
  396. void xfrm_policy_destroy(struct xfrm_policy *policy)
  397. {
  398. BUG_ON(!policy->walk.dead);
  399. if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
  400. BUG();
  401. xfrm_dev_policy_free(policy);
  402. call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
  403. }
  404. EXPORT_SYMBOL(xfrm_policy_destroy);
  405. /* Rule must be locked. Release descendant resources, announce
  406. * entry dead. The rule must be unlinked from lists to the moment.
  407. */
  408. static void xfrm_policy_kill(struct xfrm_policy *policy)
  409. {
  410. struct net *net = xp_net(policy);
  411. struct xfrm_state *x;
  412. xfrm_dev_policy_delete(policy);
  413. write_lock_bh(&policy->lock);
  414. policy->walk.dead = 1;
  415. write_unlock_bh(&policy->lock);
  416. atomic_inc(&policy->genid);
  417. if (del_timer(&policy->polq.hold_timer))
  418. xfrm_pol_put(policy);
  419. skb_queue_purge(&policy->polq.hold_queue);
  420. if (del_timer(&policy->timer))
  421. xfrm_pol_put(policy);
  422. /* XXX: Flush state cache */
  423. spin_lock_bh(&net->xfrm.xfrm_state_lock);
  424. hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
  425. hlist_del_init_rcu(&x->state_cache);
  426. }
  427. spin_unlock_bh(&net->xfrm.xfrm_state_lock);
  428. xfrm_pol_put(policy);
  429. }
  430. static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
  431. static inline unsigned int idx_hash(struct net *net, u32 index)
  432. {
  433. return __idx_hash(index, net->xfrm.policy_idx_hmask);
  434. }
  435. /* calculate policy hash thresholds */
  436. static void __get_hash_thresh(struct net *net,
  437. unsigned short family, int dir,
  438. u8 *dbits, u8 *sbits)
  439. {
  440. switch (family) {
  441. case AF_INET:
  442. *dbits = net->xfrm.policy_bydst[dir].dbits4;
  443. *sbits = net->xfrm.policy_bydst[dir].sbits4;
  444. break;
  445. case AF_INET6:
  446. *dbits = net->xfrm.policy_bydst[dir].dbits6;
  447. *sbits = net->xfrm.policy_bydst[dir].sbits6;
  448. break;
  449. default:
  450. *dbits = 0;
  451. *sbits = 0;
  452. }
  453. }
  454. static struct hlist_head *policy_hash_bysel(struct net *net,
  455. const struct xfrm_selector *sel,
  456. unsigned short family, int dir)
  457. {
  458. unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
  459. unsigned int hash;
  460. u8 dbits;
  461. u8 sbits;
  462. __get_hash_thresh(net, family, dir, &dbits, &sbits);
  463. hash = __sel_hash(sel, family, hmask, dbits, sbits);
  464. if (hash == hmask + 1)
  465. return NULL;
  466. return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
  467. lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
  468. }
  469. static struct hlist_head *policy_hash_direct(struct net *net,
  470. const xfrm_address_t *daddr,
  471. const xfrm_address_t *saddr,
  472. unsigned short family, int dir)
  473. {
  474. unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
  475. unsigned int hash;
  476. u8 dbits;
  477. u8 sbits;
  478. __get_hash_thresh(net, family, dir, &dbits, &sbits);
  479. hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
  480. return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
  481. lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
  482. }
  483. static void xfrm_dst_hash_transfer(struct net *net,
  484. struct hlist_head *list,
  485. struct hlist_head *ndsttable,
  486. unsigned int nhashmask,
  487. int dir)
  488. {
  489. struct hlist_node *tmp, *entry0 = NULL;
  490. struct xfrm_policy *pol;
  491. unsigned int h0 = 0;
  492. u8 dbits;
  493. u8 sbits;
  494. redo:
  495. hlist_for_each_entry_safe(pol, tmp, list, bydst) {
  496. unsigned int h;
  497. __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
  498. h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
  499. pol->family, nhashmask, dbits, sbits);
  500. if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
  501. hlist_del_rcu(&pol->bydst);
  502. hlist_add_head_rcu(&pol->bydst, ndsttable + h);
  503. h0 = h;
  504. } else {
  505. if (h != h0)
  506. continue;
  507. hlist_del_rcu(&pol->bydst);
  508. hlist_add_behind_rcu(&pol->bydst, entry0);
  509. }
  510. entry0 = &pol->bydst;
  511. }
  512. if (!hlist_empty(list)) {
  513. entry0 = NULL;
  514. goto redo;
  515. }
  516. }
  517. static void xfrm_idx_hash_transfer(struct hlist_head *list,
  518. struct hlist_head *nidxtable,
  519. unsigned int nhashmask)
  520. {
  521. struct hlist_node *tmp;
  522. struct xfrm_policy *pol;
  523. hlist_for_each_entry_safe(pol, tmp, list, byidx) {
  524. unsigned int h;
  525. h = __idx_hash(pol->index, nhashmask);
  526. hlist_add_head(&pol->byidx, nidxtable+h);
  527. }
  528. }
  529. static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
  530. {
  531. return ((old_hmask + 1) << 1) - 1;
  532. }
  533. static void xfrm_bydst_resize(struct net *net, int dir)
  534. {
  535. unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
  536. unsigned int nhashmask = xfrm_new_hash_mask(hmask);
  537. unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
  538. struct hlist_head *ndst = xfrm_hash_alloc(nsize);
  539. struct hlist_head *odst;
  540. int i;
  541. if (!ndst)
  542. return;
  543. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  544. write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
  545. odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
  546. lockdep_is_held(&net->xfrm.xfrm_policy_lock));
  547. for (i = hmask; i >= 0; i--)
  548. xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
  549. rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
  550. net->xfrm.policy_bydst[dir].hmask = nhashmask;
  551. write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
  552. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  553. synchronize_rcu();
  554. xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
  555. }
  556. static void xfrm_byidx_resize(struct net *net)
  557. {
  558. unsigned int hmask = net->xfrm.policy_idx_hmask;
  559. unsigned int nhashmask = xfrm_new_hash_mask(hmask);
  560. unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
  561. struct hlist_head *oidx = net->xfrm.policy_byidx;
  562. struct hlist_head *nidx = xfrm_hash_alloc(nsize);
  563. int i;
  564. if (!nidx)
  565. return;
  566. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  567. for (i = hmask; i >= 0; i--)
  568. xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
  569. net->xfrm.policy_byidx = nidx;
  570. net->xfrm.policy_idx_hmask = nhashmask;
  571. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  572. xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
  573. }
  574. static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
  575. {
  576. unsigned int cnt = net->xfrm.policy_count[dir];
  577. unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
  578. if (total)
  579. *total += cnt;
  580. if ((hmask + 1) < xfrm_policy_hashmax &&
  581. cnt > hmask)
  582. return 1;
  583. return 0;
  584. }
  585. static inline int xfrm_byidx_should_resize(struct net *net, int total)
  586. {
  587. unsigned int hmask = net->xfrm.policy_idx_hmask;
  588. if ((hmask + 1) < xfrm_policy_hashmax &&
  589. total > hmask)
  590. return 1;
  591. return 0;
  592. }
  593. void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
  594. {
  595. si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
  596. si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
  597. si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
  598. si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
  599. si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
  600. si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
  601. si->spdhcnt = net->xfrm.policy_idx_hmask;
  602. si->spdhmcnt = xfrm_policy_hashmax;
  603. }
  604. EXPORT_SYMBOL(xfrm_spd_getinfo);
  605. static DEFINE_MUTEX(hash_resize_mutex);
  606. static void xfrm_hash_resize(struct work_struct *work)
  607. {
  608. struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
  609. int dir, total;
  610. mutex_lock(&hash_resize_mutex);
  611. total = 0;
  612. for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  613. if (xfrm_bydst_should_resize(net, dir, &total))
  614. xfrm_bydst_resize(net, dir);
  615. }
  616. if (xfrm_byidx_should_resize(net, total))
  617. xfrm_byidx_resize(net);
  618. mutex_unlock(&hash_resize_mutex);
  619. }
  620. /* Make sure *pol can be inserted into fastbin.
  621. * Useful to check that later insert requests will be successful
  622. * (provided xfrm_policy_lock is held throughout).
  623. */
  624. static struct xfrm_pol_inexact_bin *
  625. xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
  626. {
  627. struct xfrm_pol_inexact_bin *bin, *prev;
  628. struct xfrm_pol_inexact_key k = {
  629. .family = pol->family,
  630. .type = pol->type,
  631. .dir = dir,
  632. .if_id = pol->if_id,
  633. };
  634. struct net *net = xp_net(pol);
  635. lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
  636. write_pnet(&k.net, net);
  637. bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
  638. xfrm_pol_inexact_params);
  639. if (bin)
  640. return bin;
  641. bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
  642. if (!bin)
  643. return NULL;
  644. bin->k = k;
  645. INIT_HLIST_HEAD(&bin->hhead);
  646. bin->root_d = RB_ROOT;
  647. bin->root_s = RB_ROOT;
  648. seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
  649. prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
  650. &bin->k, &bin->head,
  651. xfrm_pol_inexact_params);
  652. if (!prev) {
  653. list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
  654. return bin;
  655. }
  656. kfree(bin);
  657. return IS_ERR(prev) ? NULL : prev;
  658. }
  659. static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
  660. int family, u8 prefixlen)
  661. {
  662. if (xfrm_addr_any(addr, family))
  663. return true;
  664. if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
  665. return true;
  666. if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
  667. return true;
  668. return false;
  669. }
  670. static bool
  671. xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
  672. {
  673. const xfrm_address_t *addr;
  674. bool saddr_any, daddr_any;
  675. u8 prefixlen;
  676. addr = &policy->selector.saddr;
  677. prefixlen = policy->selector.prefixlen_s;
  678. saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
  679. policy->family,
  680. prefixlen);
  681. addr = &policy->selector.daddr;
  682. prefixlen = policy->selector.prefixlen_d;
  683. daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
  684. policy->family,
  685. prefixlen);
  686. return saddr_any && daddr_any;
  687. }
  688. static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
  689. const xfrm_address_t *addr, u8 prefixlen)
  690. {
  691. node->addr = *addr;
  692. node->prefixlen = prefixlen;
  693. }
  694. static struct xfrm_pol_inexact_node *
  695. xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
  696. {
  697. struct xfrm_pol_inexact_node *node;
  698. node = kzalloc(sizeof(*node), GFP_ATOMIC);
  699. if (node)
  700. xfrm_pol_inexact_node_init(node, addr, prefixlen);
  701. return node;
  702. }
  703. static int xfrm_policy_addr_delta(const xfrm_address_t *a,
  704. const xfrm_address_t *b,
  705. u8 prefixlen, u16 family)
  706. {
  707. u32 ma, mb, mask;
  708. unsigned int pdw, pbi;
  709. int delta = 0;
  710. switch (family) {
  711. case AF_INET:
  712. if (prefixlen == 0)
  713. return 0;
  714. mask = ~0U << (32 - prefixlen);
  715. ma = ntohl(a->a4) & mask;
  716. mb = ntohl(b->a4) & mask;
  717. if (ma < mb)
  718. delta = -1;
  719. else if (ma > mb)
  720. delta = 1;
  721. break;
  722. case AF_INET6:
  723. pdw = prefixlen >> 5;
  724. pbi = prefixlen & 0x1f;
  725. if (pdw) {
  726. delta = memcmp(a->a6, b->a6, pdw << 2);
  727. if (delta)
  728. return delta;
  729. }
  730. if (pbi) {
  731. mask = ~0U << (32 - pbi);
  732. ma = ntohl(a->a6[pdw]) & mask;
  733. mb = ntohl(b->a6[pdw]) & mask;
  734. if (ma < mb)
  735. delta = -1;
  736. else if (ma > mb)
  737. delta = 1;
  738. }
  739. break;
  740. default:
  741. break;
  742. }
  743. return delta;
  744. }
  745. static void xfrm_policy_inexact_list_reinsert(struct net *net,
  746. struct xfrm_pol_inexact_node *n,
  747. u16 family)
  748. {
  749. unsigned int matched_s, matched_d;
  750. struct xfrm_policy *policy, *p;
  751. matched_s = 0;
  752. matched_d = 0;
  753. list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
  754. struct hlist_node *newpos = NULL;
  755. bool matches_s, matches_d;
  756. if (policy->walk.dead || !policy->bydst_reinsert)
  757. continue;
  758. WARN_ON_ONCE(policy->family != family);
  759. policy->bydst_reinsert = false;
  760. hlist_for_each_entry(p, &n->hhead, bydst) {
  761. if (policy->priority > p->priority)
  762. newpos = &p->bydst;
  763. else if (policy->priority == p->priority &&
  764. policy->pos > p->pos)
  765. newpos = &p->bydst;
  766. else
  767. break;
  768. }
  769. if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
  770. hlist_add_behind_rcu(&policy->bydst, newpos);
  771. else
  772. hlist_add_head_rcu(&policy->bydst, &n->hhead);
  773. /* paranoia checks follow.
  774. * Check that the reinserted policy matches at least
  775. * saddr or daddr for current node prefix.
  776. *
  777. * Matching both is fine, matching saddr in one policy
  778. * (but not daddr) and then matching only daddr in another
  779. * is a bug.
  780. */
  781. matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
  782. &n->addr,
  783. n->prefixlen,
  784. family) == 0;
  785. matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
  786. &n->addr,
  787. n->prefixlen,
  788. family) == 0;
  789. if (matches_s && matches_d)
  790. continue;
  791. WARN_ON_ONCE(!matches_s && !matches_d);
  792. if (matches_s)
  793. matched_s++;
  794. if (matches_d)
  795. matched_d++;
  796. WARN_ON_ONCE(matched_s && matched_d);
  797. }
  798. }
  799. static void xfrm_policy_inexact_node_reinsert(struct net *net,
  800. struct xfrm_pol_inexact_node *n,
  801. struct rb_root *new,
  802. u16 family)
  803. {
  804. struct xfrm_pol_inexact_node *node;
  805. struct rb_node **p, *parent;
  806. /* we should not have another subtree here */
  807. WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
  808. restart:
  809. parent = NULL;
  810. p = &new->rb_node;
  811. while (*p) {
  812. u8 prefixlen;
  813. int delta;
  814. parent = *p;
  815. node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
  816. prefixlen = min(node->prefixlen, n->prefixlen);
  817. delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
  818. prefixlen, family);
  819. if (delta < 0) {
  820. p = &parent->rb_left;
  821. } else if (delta > 0) {
  822. p = &parent->rb_right;
  823. } else {
  824. bool same_prefixlen = node->prefixlen == n->prefixlen;
  825. struct xfrm_policy *tmp;
  826. hlist_for_each_entry(tmp, &n->hhead, bydst) {
  827. tmp->bydst_reinsert = true;
  828. hlist_del_rcu(&tmp->bydst);
  829. }
  830. node->prefixlen = prefixlen;
  831. xfrm_policy_inexact_list_reinsert(net, node, family);
  832. if (same_prefixlen) {
  833. kfree_rcu(n, rcu);
  834. return;
  835. }
  836. rb_erase(*p, new);
  837. kfree_rcu(n, rcu);
  838. n = node;
  839. goto restart;
  840. }
  841. }
  842. rb_link_node_rcu(&n->node, parent, p);
  843. rb_insert_color(&n->node, new);
  844. }
  845. /* merge nodes v and n */
  846. static void xfrm_policy_inexact_node_merge(struct net *net,
  847. struct xfrm_pol_inexact_node *v,
  848. struct xfrm_pol_inexact_node *n,
  849. u16 family)
  850. {
  851. struct xfrm_pol_inexact_node *node;
  852. struct xfrm_policy *tmp;
  853. struct rb_node *rnode;
  854. /* To-be-merged node v has a subtree.
  855. *
  856. * Dismantle it and insert its nodes to n->root.
  857. */
  858. while ((rnode = rb_first(&v->root)) != NULL) {
  859. node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
  860. rb_erase(&node->node, &v->root);
  861. xfrm_policy_inexact_node_reinsert(net, node, &n->root,
  862. family);
  863. }
  864. hlist_for_each_entry(tmp, &v->hhead, bydst) {
  865. tmp->bydst_reinsert = true;
  866. hlist_del_rcu(&tmp->bydst);
  867. }
  868. xfrm_policy_inexact_list_reinsert(net, n, family);
  869. }
  870. static struct xfrm_pol_inexact_node *
  871. xfrm_policy_inexact_insert_node(struct net *net,
  872. struct rb_root *root,
  873. xfrm_address_t *addr,
  874. u16 family, u8 prefixlen, u8 dir)
  875. {
  876. struct xfrm_pol_inexact_node *cached = NULL;
  877. struct rb_node **p, *parent = NULL;
  878. struct xfrm_pol_inexact_node *node;
  879. p = &root->rb_node;
  880. while (*p) {
  881. int delta;
  882. parent = *p;
  883. node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
  884. delta = xfrm_policy_addr_delta(addr, &node->addr,
  885. node->prefixlen,
  886. family);
  887. if (delta == 0 && prefixlen >= node->prefixlen) {
  888. WARN_ON_ONCE(cached); /* ipsec policies got lost */
  889. return node;
  890. }
  891. if (delta < 0)
  892. p = &parent->rb_left;
  893. else
  894. p = &parent->rb_right;
  895. if (prefixlen < node->prefixlen) {
  896. delta = xfrm_policy_addr_delta(addr, &node->addr,
  897. prefixlen,
  898. family);
  899. if (delta)
  900. continue;
  901. /* This node is a subnet of the new prefix. It needs
  902. * to be removed and re-inserted with the smaller
  903. * prefix and all nodes that are now also covered
  904. * by the reduced prefixlen.
  905. */
  906. rb_erase(&node->node, root);
  907. if (!cached) {
  908. xfrm_pol_inexact_node_init(node, addr,
  909. prefixlen);
  910. cached = node;
  911. } else {
  912. /* This node also falls within the new
  913. * prefixlen. Merge the to-be-reinserted
  914. * node and this one.
  915. */
  916. xfrm_policy_inexact_node_merge(net, node,
  917. cached, family);
  918. kfree_rcu(node, rcu);
  919. }
  920. /* restart */
  921. p = &root->rb_node;
  922. parent = NULL;
  923. }
  924. }
  925. node = cached;
  926. if (!node) {
  927. node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
  928. if (!node)
  929. return NULL;
  930. }
  931. rb_link_node_rcu(&node->node, parent, p);
  932. rb_insert_color(&node->node, root);
  933. return node;
  934. }
  935. static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
  936. {
  937. struct xfrm_pol_inexact_node *node;
  938. struct rb_node *rn = rb_first(r);
  939. while (rn) {
  940. node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
  941. xfrm_policy_inexact_gc_tree(&node->root, rm);
  942. rn = rb_next(rn);
  943. if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
  944. WARN_ON_ONCE(rm);
  945. continue;
  946. }
  947. rb_erase(&node->node, r);
  948. kfree_rcu(node, rcu);
  949. }
  950. }
  951. static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
  952. {
  953. write_seqcount_begin(&b->count);
  954. xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
  955. xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
  956. write_seqcount_end(&b->count);
  957. if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
  958. !hlist_empty(&b->hhead)) {
  959. WARN_ON_ONCE(net_exit);
  960. return;
  961. }
  962. if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
  963. xfrm_pol_inexact_params) == 0) {
  964. list_del(&b->inexact_bins);
  965. kfree_rcu(b, rcu);
  966. }
  967. }
  968. static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
  969. {
  970. struct net *net = read_pnet(&b->k.net);
  971. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  972. __xfrm_policy_inexact_prune_bin(b, false);
  973. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  974. }
  975. static void __xfrm_policy_inexact_flush(struct net *net)
  976. {
  977. struct xfrm_pol_inexact_bin *bin, *t;
  978. lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
  979. list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
  980. __xfrm_policy_inexact_prune_bin(bin, false);
  981. }
  982. static struct hlist_head *
  983. xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
  984. struct xfrm_policy *policy, u8 dir)
  985. {
  986. struct xfrm_pol_inexact_node *n;
  987. struct net *net;
  988. net = xp_net(policy);
  989. lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
  990. if (xfrm_policy_inexact_insert_use_any_list(policy))
  991. return &bin->hhead;
  992. if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
  993. policy->family,
  994. policy->selector.prefixlen_d)) {
  995. write_seqcount_begin(&bin->count);
  996. n = xfrm_policy_inexact_insert_node(net,
  997. &bin->root_s,
  998. &policy->selector.saddr,
  999. policy->family,
  1000. policy->selector.prefixlen_s,
  1001. dir);
  1002. write_seqcount_end(&bin->count);
  1003. if (!n)
  1004. return NULL;
  1005. return &n->hhead;
  1006. }
  1007. /* daddr is fixed */
  1008. write_seqcount_begin(&bin->count);
  1009. n = xfrm_policy_inexact_insert_node(net,
  1010. &bin->root_d,
  1011. &policy->selector.daddr,
  1012. policy->family,
  1013. policy->selector.prefixlen_d, dir);
  1014. write_seqcount_end(&bin->count);
  1015. if (!n)
  1016. return NULL;
  1017. /* saddr is wildcard */
  1018. if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
  1019. policy->family,
  1020. policy->selector.prefixlen_s))
  1021. return &n->hhead;
  1022. write_seqcount_begin(&bin->count);
  1023. n = xfrm_policy_inexact_insert_node(net,
  1024. &n->root,
  1025. &policy->selector.saddr,
  1026. policy->family,
  1027. policy->selector.prefixlen_s, dir);
  1028. write_seqcount_end(&bin->count);
  1029. if (!n)
  1030. return NULL;
  1031. return &n->hhead;
  1032. }
  1033. static struct xfrm_policy *
  1034. xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
  1035. {
  1036. struct xfrm_pol_inexact_bin *bin;
  1037. struct xfrm_policy *delpol;
  1038. struct hlist_head *chain;
  1039. struct net *net;
  1040. bin = xfrm_policy_inexact_alloc_bin(policy, dir);
  1041. if (!bin)
  1042. return ERR_PTR(-ENOMEM);
  1043. net = xp_net(policy);
  1044. lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
  1045. chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
  1046. if (!chain) {
  1047. __xfrm_policy_inexact_prune_bin(bin, false);
  1048. return ERR_PTR(-ENOMEM);
  1049. }
  1050. delpol = xfrm_policy_insert_list(chain, policy, excl);
  1051. if (delpol && excl) {
  1052. __xfrm_policy_inexact_prune_bin(bin, false);
  1053. return ERR_PTR(-EEXIST);
  1054. }
  1055. if (delpol)
  1056. __xfrm_policy_inexact_prune_bin(bin, false);
  1057. return delpol;
  1058. }
  1059. static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
  1060. {
  1061. int dir;
  1062. if (policy->walk.dead)
  1063. return true;
  1064. dir = xfrm_policy_id2dir(policy->index);
  1065. return dir >= XFRM_POLICY_MAX;
  1066. }
  1067. static void xfrm_hash_rebuild(struct work_struct *work)
  1068. {
  1069. struct net *net = container_of(work, struct net,
  1070. xfrm.policy_hthresh.work);
  1071. struct xfrm_policy *pol;
  1072. struct xfrm_policy *policy;
  1073. struct hlist_head *chain;
  1074. struct hlist_node *newpos;
  1075. int dir;
  1076. unsigned seq;
  1077. u8 lbits4, rbits4, lbits6, rbits6;
  1078. mutex_lock(&hash_resize_mutex);
  1079. /* read selector prefixlen thresholds */
  1080. do {
  1081. seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
  1082. lbits4 = net->xfrm.policy_hthresh.lbits4;
  1083. rbits4 = net->xfrm.policy_hthresh.rbits4;
  1084. lbits6 = net->xfrm.policy_hthresh.lbits6;
  1085. rbits6 = net->xfrm.policy_hthresh.rbits6;
  1086. } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
  1087. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1088. write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
  1089. /* make sure that we can insert the indirect policies again before
  1090. * we start with destructive action.
  1091. */
  1092. list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
  1093. struct xfrm_pol_inexact_bin *bin;
  1094. u8 dbits, sbits;
  1095. if (xfrm_policy_is_dead_or_sk(policy))
  1096. continue;
  1097. dir = xfrm_policy_id2dir(policy->index);
  1098. if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
  1099. if (policy->family == AF_INET) {
  1100. dbits = rbits4;
  1101. sbits = lbits4;
  1102. } else {
  1103. dbits = rbits6;
  1104. sbits = lbits6;
  1105. }
  1106. } else {
  1107. if (policy->family == AF_INET) {
  1108. dbits = lbits4;
  1109. sbits = rbits4;
  1110. } else {
  1111. dbits = lbits6;
  1112. sbits = rbits6;
  1113. }
  1114. }
  1115. if (policy->selector.prefixlen_d < dbits ||
  1116. policy->selector.prefixlen_s < sbits)
  1117. continue;
  1118. bin = xfrm_policy_inexact_alloc_bin(policy, dir);
  1119. if (!bin)
  1120. goto out_unlock;
  1121. if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
  1122. goto out_unlock;
  1123. }
  1124. for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  1125. if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
  1126. /* dir out => dst = remote, src = local */
  1127. net->xfrm.policy_bydst[dir].dbits4 = rbits4;
  1128. net->xfrm.policy_bydst[dir].sbits4 = lbits4;
  1129. net->xfrm.policy_bydst[dir].dbits6 = rbits6;
  1130. net->xfrm.policy_bydst[dir].sbits6 = lbits6;
  1131. } else {
  1132. /* dir in/fwd => dst = local, src = remote */
  1133. net->xfrm.policy_bydst[dir].dbits4 = lbits4;
  1134. net->xfrm.policy_bydst[dir].sbits4 = rbits4;
  1135. net->xfrm.policy_bydst[dir].dbits6 = lbits6;
  1136. net->xfrm.policy_bydst[dir].sbits6 = rbits6;
  1137. }
  1138. }
  1139. /* re-insert all policies by order of creation */
  1140. list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
  1141. if (xfrm_policy_is_dead_or_sk(policy))
  1142. continue;
  1143. hlist_del_rcu(&policy->bydst);
  1144. newpos = NULL;
  1145. dir = xfrm_policy_id2dir(policy->index);
  1146. chain = policy_hash_bysel(net, &policy->selector,
  1147. policy->family, dir);
  1148. if (!chain) {
  1149. void *p = xfrm_policy_inexact_insert(policy, dir, 0);
  1150. WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
  1151. continue;
  1152. }
  1153. hlist_for_each_entry(pol, chain, bydst) {
  1154. if (policy->priority >= pol->priority)
  1155. newpos = &pol->bydst;
  1156. else
  1157. break;
  1158. }
  1159. if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
  1160. hlist_add_behind_rcu(&policy->bydst, newpos);
  1161. else
  1162. hlist_add_head_rcu(&policy->bydst, chain);
  1163. }
  1164. out_unlock:
  1165. __xfrm_policy_inexact_flush(net);
  1166. write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
  1167. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1168. mutex_unlock(&hash_resize_mutex);
  1169. }
  1170. void xfrm_policy_hash_rebuild(struct net *net)
  1171. {
  1172. schedule_work(&net->xfrm.policy_hthresh.work);
  1173. }
  1174. EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
  1175. /* Generate new index... KAME seems to generate them ordered by cost
  1176. * of an absolute inpredictability of ordering of rules. This will not pass. */
  1177. static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
  1178. {
  1179. for (;;) {
  1180. struct hlist_head *list;
  1181. struct xfrm_policy *p;
  1182. u32 idx;
  1183. int found;
  1184. if (!index) {
  1185. idx = (net->xfrm.idx_generator | dir);
  1186. net->xfrm.idx_generator += 8;
  1187. } else {
  1188. idx = index;
  1189. index = 0;
  1190. }
  1191. if (idx == 0)
  1192. idx = 8;
  1193. list = net->xfrm.policy_byidx + idx_hash(net, idx);
  1194. found = 0;
  1195. hlist_for_each_entry(p, list, byidx) {
  1196. if (p->index == idx) {
  1197. found = 1;
  1198. break;
  1199. }
  1200. }
  1201. if (!found)
  1202. return idx;
  1203. }
  1204. }
  1205. static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
  1206. {
  1207. u32 *p1 = (u32 *) s1;
  1208. u32 *p2 = (u32 *) s2;
  1209. int len = sizeof(struct xfrm_selector) / sizeof(u32);
  1210. int i;
  1211. for (i = 0; i < len; i++) {
  1212. if (p1[i] != p2[i])
  1213. return 1;
  1214. }
  1215. return 0;
  1216. }
  1217. static void xfrm_policy_requeue(struct xfrm_policy *old,
  1218. struct xfrm_policy *new)
  1219. {
  1220. struct xfrm_policy_queue *pq = &old->polq;
  1221. struct sk_buff_head list;
  1222. if (skb_queue_empty(&pq->hold_queue))
  1223. return;
  1224. __skb_queue_head_init(&list);
  1225. spin_lock_bh(&pq->hold_queue.lock);
  1226. skb_queue_splice_init(&pq->hold_queue, &list);
  1227. if (del_timer(&pq->hold_timer))
  1228. xfrm_pol_put(old);
  1229. spin_unlock_bh(&pq->hold_queue.lock);
  1230. pq = &new->polq;
  1231. spin_lock_bh(&pq->hold_queue.lock);
  1232. skb_queue_splice(&list, &pq->hold_queue);
  1233. pq->timeout = XFRM_QUEUE_TMO_MIN;
  1234. if (!mod_timer(&pq->hold_timer, jiffies))
  1235. xfrm_pol_hold(new);
  1236. spin_unlock_bh(&pq->hold_queue.lock);
  1237. }
  1238. static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
  1239. struct xfrm_policy *pol)
  1240. {
  1241. return mark->v == pol->mark.v && mark->m == pol->mark.m;
  1242. }
  1243. static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
  1244. {
  1245. const struct xfrm_pol_inexact_key *k = data;
  1246. u32 a = k->type << 24 | k->dir << 16 | k->family;
  1247. return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
  1248. seed);
  1249. }
  1250. static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
  1251. {
  1252. const struct xfrm_pol_inexact_bin *b = data;
  1253. return xfrm_pol_bin_key(&b->k, 0, seed);
  1254. }
  1255. static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
  1256. const void *ptr)
  1257. {
  1258. const struct xfrm_pol_inexact_key *key = arg->key;
  1259. const struct xfrm_pol_inexact_bin *b = ptr;
  1260. int ret;
  1261. if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
  1262. return -1;
  1263. ret = b->k.dir ^ key->dir;
  1264. if (ret)
  1265. return ret;
  1266. ret = b->k.type ^ key->type;
  1267. if (ret)
  1268. return ret;
  1269. ret = b->k.family ^ key->family;
  1270. if (ret)
  1271. return ret;
  1272. return b->k.if_id ^ key->if_id;
  1273. }
  1274. static const struct rhashtable_params xfrm_pol_inexact_params = {
  1275. .head_offset = offsetof(struct xfrm_pol_inexact_bin, head),
  1276. .hashfn = xfrm_pol_bin_key,
  1277. .obj_hashfn = xfrm_pol_bin_obj,
  1278. .obj_cmpfn = xfrm_pol_bin_cmp,
  1279. .automatic_shrinking = true,
  1280. };
  1281. static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
  1282. struct xfrm_policy *policy,
  1283. bool excl)
  1284. {
  1285. struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
  1286. hlist_for_each_entry(pol, chain, bydst) {
  1287. if (pol->type == policy->type &&
  1288. pol->if_id == policy->if_id &&
  1289. !selector_cmp(&pol->selector, &policy->selector) &&
  1290. xfrm_policy_mark_match(&policy->mark, pol) &&
  1291. xfrm_sec_ctx_match(pol->security, policy->security) &&
  1292. !WARN_ON(delpol)) {
  1293. if (excl)
  1294. return ERR_PTR(-EEXIST);
  1295. delpol = pol;
  1296. if (policy->priority > pol->priority)
  1297. continue;
  1298. } else if (policy->priority >= pol->priority) {
  1299. newpos = pol;
  1300. continue;
  1301. }
  1302. if (delpol)
  1303. break;
  1304. }
  1305. if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
  1306. hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
  1307. else
  1308. /* Packet offload policies enter to the head
  1309. * to speed-up lookups.
  1310. */
  1311. hlist_add_head_rcu(&policy->bydst, chain);
  1312. return delpol;
  1313. }
  1314. int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
  1315. {
  1316. struct net *net = xp_net(policy);
  1317. struct xfrm_policy *delpol;
  1318. struct hlist_head *chain;
  1319. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1320. chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
  1321. if (chain)
  1322. delpol = xfrm_policy_insert_list(chain, policy, excl);
  1323. else
  1324. delpol = xfrm_policy_inexact_insert(policy, dir, excl);
  1325. if (IS_ERR(delpol)) {
  1326. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1327. return PTR_ERR(delpol);
  1328. }
  1329. __xfrm_policy_link(policy, dir);
  1330. /* After previous checking, family can either be AF_INET or AF_INET6 */
  1331. if (policy->family == AF_INET)
  1332. rt_genid_bump_ipv4(net);
  1333. else
  1334. rt_genid_bump_ipv6(net);
  1335. if (delpol) {
  1336. xfrm_policy_requeue(delpol, policy);
  1337. __xfrm_policy_unlink(delpol, dir);
  1338. }
  1339. policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
  1340. hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
  1341. policy->curlft.add_time = ktime_get_real_seconds();
  1342. policy->curlft.use_time = 0;
  1343. if (!mod_timer(&policy->timer, jiffies + HZ))
  1344. xfrm_pol_hold(policy);
  1345. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1346. if (delpol)
  1347. xfrm_policy_kill(delpol);
  1348. else if (xfrm_bydst_should_resize(net, dir, NULL))
  1349. schedule_work(&net->xfrm.policy_hash_work);
  1350. return 0;
  1351. }
  1352. EXPORT_SYMBOL(xfrm_policy_insert);
  1353. static struct xfrm_policy *
  1354. __xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
  1355. u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
  1356. struct xfrm_sec_ctx *ctx)
  1357. {
  1358. struct xfrm_policy *pol;
  1359. if (!chain)
  1360. return NULL;
  1361. hlist_for_each_entry(pol, chain, bydst) {
  1362. if (pol->type == type &&
  1363. pol->if_id == if_id &&
  1364. xfrm_policy_mark_match(mark, pol) &&
  1365. !selector_cmp(sel, &pol->selector) &&
  1366. xfrm_sec_ctx_match(ctx, pol->security))
  1367. return pol;
  1368. }
  1369. return NULL;
  1370. }
  1371. struct xfrm_policy *
  1372. xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
  1373. u8 type, int dir, struct xfrm_selector *sel,
  1374. struct xfrm_sec_ctx *ctx, int delete, int *err)
  1375. {
  1376. struct xfrm_pol_inexact_bin *bin = NULL;
  1377. struct xfrm_policy *pol, *ret = NULL;
  1378. struct hlist_head *chain;
  1379. *err = 0;
  1380. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1381. chain = policy_hash_bysel(net, sel, sel->family, dir);
  1382. if (!chain) {
  1383. struct xfrm_pol_inexact_candidates cand;
  1384. int i;
  1385. bin = xfrm_policy_inexact_lookup(net, type,
  1386. sel->family, dir, if_id);
  1387. if (!bin) {
  1388. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1389. return NULL;
  1390. }
  1391. if (!xfrm_policy_find_inexact_candidates(&cand, bin,
  1392. &sel->saddr,
  1393. &sel->daddr)) {
  1394. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1395. return NULL;
  1396. }
  1397. pol = NULL;
  1398. for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
  1399. struct xfrm_policy *tmp;
  1400. tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
  1401. if_id, type, dir,
  1402. sel, ctx);
  1403. if (!tmp)
  1404. continue;
  1405. if (!pol || tmp->pos < pol->pos)
  1406. pol = tmp;
  1407. }
  1408. } else {
  1409. pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
  1410. sel, ctx);
  1411. }
  1412. if (pol) {
  1413. xfrm_pol_hold(pol);
  1414. if (delete) {
  1415. *err = security_xfrm_policy_delete(pol->security);
  1416. if (*err) {
  1417. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1418. return pol;
  1419. }
  1420. __xfrm_policy_unlink(pol, dir);
  1421. }
  1422. ret = pol;
  1423. }
  1424. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1425. if (ret && delete)
  1426. xfrm_policy_kill(ret);
  1427. if (bin && delete)
  1428. xfrm_policy_inexact_prune_bin(bin);
  1429. return ret;
  1430. }
  1431. EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
  1432. struct xfrm_policy *
  1433. xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
  1434. u8 type, int dir, u32 id, int delete, int *err)
  1435. {
  1436. struct xfrm_policy *pol, *ret;
  1437. struct hlist_head *chain;
  1438. *err = -ENOENT;
  1439. if (xfrm_policy_id2dir(id) != dir)
  1440. return NULL;
  1441. *err = 0;
  1442. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1443. chain = net->xfrm.policy_byidx + idx_hash(net, id);
  1444. ret = NULL;
  1445. hlist_for_each_entry(pol, chain, byidx) {
  1446. if (pol->type == type && pol->index == id &&
  1447. pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
  1448. xfrm_pol_hold(pol);
  1449. if (delete) {
  1450. *err = security_xfrm_policy_delete(
  1451. pol->security);
  1452. if (*err) {
  1453. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1454. return pol;
  1455. }
  1456. __xfrm_policy_unlink(pol, dir);
  1457. }
  1458. ret = pol;
  1459. break;
  1460. }
  1461. }
  1462. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1463. if (ret && delete)
  1464. xfrm_policy_kill(ret);
  1465. return ret;
  1466. }
  1467. EXPORT_SYMBOL(xfrm_policy_byid);
  1468. #ifdef CONFIG_SECURITY_NETWORK_XFRM
  1469. static inline int
  1470. xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
  1471. {
  1472. struct xfrm_policy *pol;
  1473. int err = 0;
  1474. list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
  1475. if (pol->walk.dead ||
  1476. xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
  1477. pol->type != type)
  1478. continue;
  1479. err = security_xfrm_policy_delete(pol->security);
  1480. if (err) {
  1481. xfrm_audit_policy_delete(pol, 0, task_valid);
  1482. return err;
  1483. }
  1484. }
  1485. return err;
  1486. }
  1487. static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
  1488. struct net_device *dev,
  1489. bool task_valid)
  1490. {
  1491. struct xfrm_policy *pol;
  1492. int err = 0;
  1493. list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
  1494. if (pol->walk.dead ||
  1495. xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
  1496. pol->xdo.dev != dev)
  1497. continue;
  1498. err = security_xfrm_policy_delete(pol->security);
  1499. if (err) {
  1500. xfrm_audit_policy_delete(pol, 0, task_valid);
  1501. return err;
  1502. }
  1503. }
  1504. return err;
  1505. }
  1506. #else
  1507. static inline int
  1508. xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
  1509. {
  1510. return 0;
  1511. }
  1512. static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
  1513. struct net_device *dev,
  1514. bool task_valid)
  1515. {
  1516. return 0;
  1517. }
  1518. #endif
  1519. int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
  1520. {
  1521. int dir, err = 0, cnt = 0;
  1522. struct xfrm_policy *pol;
  1523. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1524. err = xfrm_policy_flush_secctx_check(net, type, task_valid);
  1525. if (err)
  1526. goto out;
  1527. again:
  1528. list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
  1529. if (pol->walk.dead)
  1530. continue;
  1531. dir = xfrm_policy_id2dir(pol->index);
  1532. if (dir >= XFRM_POLICY_MAX ||
  1533. pol->type != type)
  1534. continue;
  1535. __xfrm_policy_unlink(pol, dir);
  1536. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1537. cnt++;
  1538. xfrm_audit_policy_delete(pol, 1, task_valid);
  1539. xfrm_policy_kill(pol);
  1540. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1541. goto again;
  1542. }
  1543. if (cnt)
  1544. __xfrm_policy_inexact_flush(net);
  1545. else
  1546. err = -ESRCH;
  1547. out:
  1548. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1549. return err;
  1550. }
  1551. EXPORT_SYMBOL(xfrm_policy_flush);
  1552. int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
  1553. bool task_valid)
  1554. {
  1555. int dir, err = 0, cnt = 0;
  1556. struct xfrm_policy *pol;
  1557. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1558. err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
  1559. if (err)
  1560. goto out;
  1561. again:
  1562. list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
  1563. if (pol->walk.dead)
  1564. continue;
  1565. dir = xfrm_policy_id2dir(pol->index);
  1566. if (dir >= XFRM_POLICY_MAX ||
  1567. pol->xdo.dev != dev)
  1568. continue;
  1569. __xfrm_policy_unlink(pol, dir);
  1570. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1571. cnt++;
  1572. xfrm_audit_policy_delete(pol, 1, task_valid);
  1573. xfrm_policy_kill(pol);
  1574. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1575. goto again;
  1576. }
  1577. if (cnt)
  1578. __xfrm_policy_inexact_flush(net);
  1579. else
  1580. err = -ESRCH;
  1581. out:
  1582. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1583. return err;
  1584. }
  1585. EXPORT_SYMBOL(xfrm_dev_policy_flush);
  1586. int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
  1587. int (*func)(struct xfrm_policy *, int, int, void*),
  1588. void *data)
  1589. {
  1590. struct xfrm_policy *pol;
  1591. struct xfrm_policy_walk_entry *x;
  1592. int error = 0;
  1593. if (walk->type >= XFRM_POLICY_TYPE_MAX &&
  1594. walk->type != XFRM_POLICY_TYPE_ANY)
  1595. return -EINVAL;
  1596. if (list_empty(&walk->walk.all) && walk->seq != 0)
  1597. return 0;
  1598. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1599. if (list_empty(&walk->walk.all))
  1600. x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
  1601. else
  1602. x = list_first_entry(&walk->walk.all,
  1603. struct xfrm_policy_walk_entry, all);
  1604. list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
  1605. if (x->dead)
  1606. continue;
  1607. pol = container_of(x, struct xfrm_policy, walk);
  1608. if (walk->type != XFRM_POLICY_TYPE_ANY &&
  1609. walk->type != pol->type)
  1610. continue;
  1611. error = func(pol, xfrm_policy_id2dir(pol->index),
  1612. walk->seq, data);
  1613. if (error) {
  1614. list_move_tail(&walk->walk.all, &x->all);
  1615. goto out;
  1616. }
  1617. walk->seq++;
  1618. }
  1619. if (walk->seq == 0) {
  1620. error = -ENOENT;
  1621. goto out;
  1622. }
  1623. list_del_init(&walk->walk.all);
  1624. out:
  1625. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1626. return error;
  1627. }
  1628. EXPORT_SYMBOL(xfrm_policy_walk);
  1629. void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
  1630. {
  1631. INIT_LIST_HEAD(&walk->walk.all);
  1632. walk->walk.dead = 1;
  1633. walk->type = type;
  1634. walk->seq = 0;
  1635. }
  1636. EXPORT_SYMBOL(xfrm_policy_walk_init);
  1637. void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
  1638. {
  1639. if (list_empty(&walk->walk.all))
  1640. return;
  1641. spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
  1642. list_del(&walk->walk.all);
  1643. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1644. }
  1645. EXPORT_SYMBOL(xfrm_policy_walk_done);
  1646. /*
  1647. * Find policy to apply to this flow.
  1648. *
  1649. * Returns 0 if policy found, else an -errno.
  1650. */
  1651. static int xfrm_policy_match(const struct xfrm_policy *pol,
  1652. const struct flowi *fl,
  1653. u8 type, u16 family, u32 if_id)
  1654. {
  1655. const struct xfrm_selector *sel = &pol->selector;
  1656. int ret = -ESRCH;
  1657. bool match;
  1658. if (pol->family != family ||
  1659. pol->if_id != if_id ||
  1660. (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
  1661. pol->type != type)
  1662. return ret;
  1663. match = xfrm_selector_match(sel, fl, family);
  1664. if (match)
  1665. ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid);
  1666. return ret;
  1667. }
  1668. static struct xfrm_pol_inexact_node *
  1669. xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
  1670. seqcount_spinlock_t *count,
  1671. const xfrm_address_t *addr, u16 family)
  1672. {
  1673. const struct rb_node *parent;
  1674. int seq;
  1675. again:
  1676. seq = read_seqcount_begin(count);
  1677. parent = rcu_dereference_raw(r->rb_node);
  1678. while (parent) {
  1679. struct xfrm_pol_inexact_node *node;
  1680. int delta;
  1681. node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
  1682. delta = xfrm_policy_addr_delta(addr, &node->addr,
  1683. node->prefixlen, family);
  1684. if (delta < 0) {
  1685. parent = rcu_dereference_raw(parent->rb_left);
  1686. continue;
  1687. } else if (delta > 0) {
  1688. parent = rcu_dereference_raw(parent->rb_right);
  1689. continue;
  1690. }
  1691. return node;
  1692. }
  1693. if (read_seqcount_retry(count, seq))
  1694. goto again;
  1695. return NULL;
  1696. }
  1697. static bool
  1698. xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
  1699. struct xfrm_pol_inexact_bin *b,
  1700. const xfrm_address_t *saddr,
  1701. const xfrm_address_t *daddr)
  1702. {
  1703. struct xfrm_pol_inexact_node *n;
  1704. u16 family;
  1705. if (!b)
  1706. return false;
  1707. family = b->k.family;
  1708. memset(cand, 0, sizeof(*cand));
  1709. cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
  1710. n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
  1711. family);
  1712. if (n) {
  1713. cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
  1714. n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
  1715. family);
  1716. if (n)
  1717. cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
  1718. }
  1719. n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
  1720. family);
  1721. if (n)
  1722. cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
  1723. return true;
  1724. }
  1725. static struct xfrm_pol_inexact_bin *
  1726. xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
  1727. u8 dir, u32 if_id)
  1728. {
  1729. struct xfrm_pol_inexact_key k = {
  1730. .family = family,
  1731. .type = type,
  1732. .dir = dir,
  1733. .if_id = if_id,
  1734. };
  1735. write_pnet(&k.net, net);
  1736. return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
  1737. xfrm_pol_inexact_params);
  1738. }
  1739. static struct xfrm_pol_inexact_bin *
  1740. xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
  1741. u8 dir, u32 if_id)
  1742. {
  1743. struct xfrm_pol_inexact_bin *bin;
  1744. lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
  1745. rcu_read_lock();
  1746. bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
  1747. rcu_read_unlock();
  1748. return bin;
  1749. }
  1750. static struct xfrm_policy *
  1751. __xfrm_policy_eval_candidates(struct hlist_head *chain,
  1752. struct xfrm_policy *prefer,
  1753. const struct flowi *fl,
  1754. u8 type, u16 family, u32 if_id)
  1755. {
  1756. u32 priority = prefer ? prefer->priority : ~0u;
  1757. struct xfrm_policy *pol;
  1758. if (!chain)
  1759. return NULL;
  1760. hlist_for_each_entry_rcu(pol, chain, bydst) {
  1761. int err;
  1762. if (pol->priority > priority)
  1763. break;
  1764. err = xfrm_policy_match(pol, fl, type, family, if_id);
  1765. if (err) {
  1766. if (err != -ESRCH)
  1767. return ERR_PTR(err);
  1768. continue;
  1769. }
  1770. if (prefer) {
  1771. /* matches. Is it older than *prefer? */
  1772. if (pol->priority == priority &&
  1773. prefer->pos < pol->pos)
  1774. return prefer;
  1775. }
  1776. return pol;
  1777. }
  1778. return NULL;
  1779. }
  1780. static struct xfrm_policy *
  1781. xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
  1782. struct xfrm_policy *prefer,
  1783. const struct flowi *fl,
  1784. u8 type, u16 family, u32 if_id)
  1785. {
  1786. struct xfrm_policy *tmp;
  1787. int i;
  1788. for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
  1789. tmp = __xfrm_policy_eval_candidates(cand->res[i],
  1790. prefer,
  1791. fl, type, family, if_id);
  1792. if (!tmp)
  1793. continue;
  1794. if (IS_ERR(tmp))
  1795. return tmp;
  1796. prefer = tmp;
  1797. }
  1798. return prefer;
  1799. }
  1800. static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
  1801. const struct flowi *fl,
  1802. u16 family, u8 dir,
  1803. u32 if_id)
  1804. {
  1805. struct xfrm_pol_inexact_candidates cand;
  1806. const xfrm_address_t *daddr, *saddr;
  1807. struct xfrm_pol_inexact_bin *bin;
  1808. struct xfrm_policy *pol, *ret;
  1809. struct hlist_head *chain;
  1810. unsigned int sequence;
  1811. int err;
  1812. daddr = xfrm_flowi_daddr(fl, family);
  1813. saddr = xfrm_flowi_saddr(fl, family);
  1814. if (unlikely(!daddr || !saddr))
  1815. return NULL;
  1816. rcu_read_lock();
  1817. retry:
  1818. do {
  1819. sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
  1820. chain = policy_hash_direct(net, daddr, saddr, family, dir);
  1821. } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
  1822. ret = NULL;
  1823. hlist_for_each_entry_rcu(pol, chain, bydst) {
  1824. err = xfrm_policy_match(pol, fl, type, family, if_id);
  1825. if (err) {
  1826. if (err == -ESRCH)
  1827. continue;
  1828. else {
  1829. ret = ERR_PTR(err);
  1830. goto fail;
  1831. }
  1832. } else {
  1833. ret = pol;
  1834. break;
  1835. }
  1836. }
  1837. if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET)
  1838. goto skip_inexact;
  1839. bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
  1840. if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
  1841. daddr))
  1842. goto skip_inexact;
  1843. pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
  1844. family, if_id);
  1845. if (pol) {
  1846. ret = pol;
  1847. if (IS_ERR(pol))
  1848. goto fail;
  1849. }
  1850. skip_inexact:
  1851. if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
  1852. goto retry;
  1853. if (ret && !xfrm_pol_hold_rcu(ret))
  1854. goto retry;
  1855. fail:
  1856. rcu_read_unlock();
  1857. return ret;
  1858. }
  1859. static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
  1860. const struct flowi *fl,
  1861. u16 family, u8 dir, u32 if_id)
  1862. {
  1863. #ifdef CONFIG_XFRM_SUB_POLICY
  1864. struct xfrm_policy *pol;
  1865. pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
  1866. dir, if_id);
  1867. if (pol != NULL)
  1868. return pol;
  1869. #endif
  1870. return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
  1871. dir, if_id);
  1872. }
  1873. static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  1874. const struct flowi *fl,
  1875. u16 family, u32 if_id)
  1876. {
  1877. struct xfrm_policy *pol;
  1878. rcu_read_lock();
  1879. again:
  1880. pol = rcu_dereference(sk->sk_policy[dir]);
  1881. if (pol != NULL) {
  1882. bool match;
  1883. int err = 0;
  1884. if (pol->family != family) {
  1885. pol = NULL;
  1886. goto out;
  1887. }
  1888. match = xfrm_selector_match(&pol->selector, fl, family);
  1889. if (match) {
  1890. if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
  1891. pol->if_id != if_id) {
  1892. pol = NULL;
  1893. goto out;
  1894. }
  1895. err = security_xfrm_policy_lookup(pol->security,
  1896. fl->flowi_secid);
  1897. if (!err) {
  1898. if (!xfrm_pol_hold_rcu(pol))
  1899. goto again;
  1900. } else if (err == -ESRCH) {
  1901. pol = NULL;
  1902. } else {
  1903. pol = ERR_PTR(err);
  1904. }
  1905. } else
  1906. pol = NULL;
  1907. }
  1908. out:
  1909. rcu_read_unlock();
  1910. return pol;
  1911. }
  1912. static u32 xfrm_gen_pos_slow(struct net *net)
  1913. {
  1914. struct xfrm_policy *policy;
  1915. u32 i = 0;
  1916. /* oldest entry is last in list */
  1917. list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
  1918. if (!xfrm_policy_is_dead_or_sk(policy))
  1919. policy->pos = ++i;
  1920. }
  1921. return i;
  1922. }
  1923. static u32 xfrm_gen_pos(struct net *net)
  1924. {
  1925. const struct xfrm_policy *policy;
  1926. u32 i = 0;
  1927. /* most recently added policy is at the head of the list */
  1928. list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
  1929. if (xfrm_policy_is_dead_or_sk(policy))
  1930. continue;
  1931. if (policy->pos == UINT_MAX)
  1932. return xfrm_gen_pos_slow(net);
  1933. i = policy->pos + 1;
  1934. break;
  1935. }
  1936. return i;
  1937. }
  1938. static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
  1939. {
  1940. struct net *net = xp_net(pol);
  1941. switch (dir) {
  1942. case XFRM_POLICY_IN:
  1943. case XFRM_POLICY_FWD:
  1944. case XFRM_POLICY_OUT:
  1945. pol->pos = xfrm_gen_pos(net);
  1946. break;
  1947. }
  1948. list_add(&pol->walk.all, &net->xfrm.policy_all);
  1949. net->xfrm.policy_count[dir]++;
  1950. xfrm_pol_hold(pol);
  1951. }
  1952. static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
  1953. int dir)
  1954. {
  1955. struct net *net = xp_net(pol);
  1956. if (list_empty(&pol->walk.all))
  1957. return NULL;
  1958. /* Socket policies are not hashed. */
  1959. if (!hlist_unhashed(&pol->bydst)) {
  1960. hlist_del_rcu(&pol->bydst);
  1961. hlist_del(&pol->byidx);
  1962. }
  1963. list_del_init(&pol->walk.all);
  1964. net->xfrm.policy_count[dir]--;
  1965. return pol;
  1966. }
  1967. static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
  1968. {
  1969. __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
  1970. }
  1971. static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
  1972. {
  1973. __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
  1974. }
  1975. int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
  1976. {
  1977. struct net *net = xp_net(pol);
  1978. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1979. pol = __xfrm_policy_unlink(pol, dir);
  1980. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  1981. if (pol) {
  1982. xfrm_policy_kill(pol);
  1983. return 0;
  1984. }
  1985. return -ENOENT;
  1986. }
  1987. EXPORT_SYMBOL(xfrm_policy_delete);
  1988. int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
  1989. {
  1990. struct net *net = sock_net(sk);
  1991. struct xfrm_policy *old_pol;
  1992. #ifdef CONFIG_XFRM_SUB_POLICY
  1993. if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
  1994. return -EINVAL;
  1995. #endif
  1996. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  1997. old_pol = rcu_dereference_protected(sk->sk_policy[dir],
  1998. lockdep_is_held(&net->xfrm.xfrm_policy_lock));
  1999. if (pol) {
  2000. pol->curlft.add_time = ktime_get_real_seconds();
  2001. pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
  2002. xfrm_sk_policy_link(pol, dir);
  2003. }
  2004. rcu_assign_pointer(sk->sk_policy[dir], pol);
  2005. if (old_pol) {
  2006. if (pol)
  2007. xfrm_policy_requeue(old_pol, pol);
  2008. /* Unlinking succeeds always. This is the only function
  2009. * allowed to delete or replace socket policy.
  2010. */
  2011. xfrm_sk_policy_unlink(old_pol, dir);
  2012. }
  2013. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  2014. if (old_pol) {
  2015. xfrm_policy_kill(old_pol);
  2016. }
  2017. return 0;
  2018. }
  2019. static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
  2020. {
  2021. struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
  2022. struct net *net = xp_net(old);
  2023. if (newp) {
  2024. newp->selector = old->selector;
  2025. if (security_xfrm_policy_clone(old->security,
  2026. &newp->security)) {
  2027. kfree(newp);
  2028. return NULL; /* ENOMEM */
  2029. }
  2030. newp->lft = old->lft;
  2031. newp->curlft = old->curlft;
  2032. newp->mark = old->mark;
  2033. newp->if_id = old->if_id;
  2034. newp->action = old->action;
  2035. newp->flags = old->flags;
  2036. newp->xfrm_nr = old->xfrm_nr;
  2037. newp->index = old->index;
  2038. newp->type = old->type;
  2039. newp->family = old->family;
  2040. memcpy(newp->xfrm_vec, old->xfrm_vec,
  2041. newp->xfrm_nr*sizeof(struct xfrm_tmpl));
  2042. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  2043. xfrm_sk_policy_link(newp, dir);
  2044. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  2045. xfrm_pol_put(newp);
  2046. }
  2047. return newp;
  2048. }
  2049. int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
  2050. {
  2051. const struct xfrm_policy *p;
  2052. struct xfrm_policy *np;
  2053. int i, ret = 0;
  2054. rcu_read_lock();
  2055. for (i = 0; i < 2; i++) {
  2056. p = rcu_dereference(osk->sk_policy[i]);
  2057. if (p) {
  2058. np = clone_policy(p, i);
  2059. if (unlikely(!np)) {
  2060. ret = -ENOMEM;
  2061. break;
  2062. }
  2063. rcu_assign_pointer(sk->sk_policy[i], np);
  2064. }
  2065. }
  2066. rcu_read_unlock();
  2067. return ret;
  2068. }
  2069. static int
  2070. xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr,
  2071. const struct xfrm_dst_lookup_params *params)
  2072. {
  2073. int err;
  2074. const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
  2075. if (unlikely(afinfo == NULL))
  2076. return -EINVAL;
  2077. err = afinfo->get_saddr(saddr, params);
  2078. rcu_read_unlock();
  2079. return err;
  2080. }
  2081. /* Resolve list of templates for the flow, given policy. */
  2082. static int
  2083. xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
  2084. struct xfrm_state **xfrm, unsigned short family)
  2085. {
  2086. struct net *net = xp_net(policy);
  2087. int nx;
  2088. int i, error;
  2089. xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
  2090. xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
  2091. xfrm_address_t tmp;
  2092. for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
  2093. struct xfrm_state *x;
  2094. xfrm_address_t *remote = daddr;
  2095. xfrm_address_t *local = saddr;
  2096. struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
  2097. if (tmpl->mode == XFRM_MODE_TUNNEL ||
  2098. tmpl->mode == XFRM_MODE_BEET) {
  2099. remote = &tmpl->id.daddr;
  2100. local = &tmpl->saddr;
  2101. if (xfrm_addr_any(local, tmpl->encap_family)) {
  2102. struct xfrm_dst_lookup_params params;
  2103. memset(&params, 0, sizeof(params));
  2104. params.net = net;
  2105. params.oif = fl->flowi_oif;
  2106. params.daddr = remote;
  2107. error = xfrm_get_saddr(tmpl->encap_family, &tmp,
  2108. &params);
  2109. if (error)
  2110. goto fail;
  2111. local = &tmp;
  2112. }
  2113. }
  2114. x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
  2115. family, policy->if_id);
  2116. if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
  2117. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
  2118. xfrm_state_put(x);
  2119. error = -EINVAL;
  2120. goto fail;
  2121. }
  2122. if (x && x->km.state == XFRM_STATE_VALID) {
  2123. xfrm[nx++] = x;
  2124. daddr = remote;
  2125. saddr = local;
  2126. continue;
  2127. }
  2128. if (x) {
  2129. error = (x->km.state == XFRM_STATE_ERROR ?
  2130. -EINVAL : -EAGAIN);
  2131. xfrm_state_put(x);
  2132. } else if (error == -ESRCH) {
  2133. error = -EAGAIN;
  2134. }
  2135. if (!tmpl->optional)
  2136. goto fail;
  2137. }
  2138. return nx;
  2139. fail:
  2140. for (nx--; nx >= 0; nx--)
  2141. xfrm_state_put(xfrm[nx]);
  2142. return error;
  2143. }
  2144. static int
  2145. xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
  2146. struct xfrm_state **xfrm, unsigned short family)
  2147. {
  2148. struct xfrm_state *tp[XFRM_MAX_DEPTH];
  2149. struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
  2150. int cnx = 0;
  2151. int error;
  2152. int ret;
  2153. int i;
  2154. for (i = 0; i < npols; i++) {
  2155. if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
  2156. error = -ENOBUFS;
  2157. goto fail;
  2158. }
  2159. ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
  2160. if (ret < 0) {
  2161. error = ret;
  2162. goto fail;
  2163. } else
  2164. cnx += ret;
  2165. }
  2166. /* found states are sorted for outbound processing */
  2167. if (npols > 1)
  2168. xfrm_state_sort(xfrm, tpp, cnx, family);
  2169. return cnx;
  2170. fail:
  2171. for (cnx--; cnx >= 0; cnx--)
  2172. xfrm_state_put(tpp[cnx]);
  2173. return error;
  2174. }
  2175. static int xfrm_get_tos(const struct flowi *fl, int family)
  2176. {
  2177. if (family == AF_INET)
  2178. return fl->u.ip4.flowi4_tos & INET_DSCP_MASK;
  2179. return 0;
  2180. }
  2181. static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
  2182. {
  2183. const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
  2184. struct dst_ops *dst_ops;
  2185. struct xfrm_dst *xdst;
  2186. if (!afinfo)
  2187. return ERR_PTR(-EINVAL);
  2188. switch (family) {
  2189. case AF_INET:
  2190. dst_ops = &net->xfrm.xfrm4_dst_ops;
  2191. break;
  2192. #if IS_ENABLED(CONFIG_IPV6)
  2193. case AF_INET6:
  2194. dst_ops = &net->xfrm.xfrm6_dst_ops;
  2195. break;
  2196. #endif
  2197. default:
  2198. BUG();
  2199. }
  2200. xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0);
  2201. if (likely(xdst)) {
  2202. memset_after(xdst, 0, u.dst);
  2203. } else
  2204. xdst = ERR_PTR(-ENOBUFS);
  2205. rcu_read_unlock();
  2206. return xdst;
  2207. }
  2208. static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
  2209. int nfheader_len)
  2210. {
  2211. if (dst->ops->family == AF_INET6) {
  2212. path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
  2213. path->u.rt6.rt6i_nfheader_len = nfheader_len;
  2214. }
  2215. }
  2216. static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
  2217. const struct flowi *fl)
  2218. {
  2219. const struct xfrm_policy_afinfo *afinfo =
  2220. xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
  2221. int err;
  2222. if (!afinfo)
  2223. return -EINVAL;
  2224. err = afinfo->fill_dst(xdst, dev, fl);
  2225. rcu_read_unlock();
  2226. return err;
  2227. }
  2228. /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  2229. * all the metrics... Shortly, bundle a bundle.
  2230. */
  2231. static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
  2232. struct xfrm_state **xfrm,
  2233. struct xfrm_dst **bundle,
  2234. int nx,
  2235. const struct flowi *fl,
  2236. struct dst_entry *dst)
  2237. {
  2238. const struct xfrm_state_afinfo *afinfo;
  2239. const struct xfrm_mode *inner_mode;
  2240. struct net *net = xp_net(policy);
  2241. unsigned long now = jiffies;
  2242. struct net_device *dev;
  2243. struct xfrm_dst *xdst_prev = NULL;
  2244. struct xfrm_dst *xdst0 = NULL;
  2245. int i = 0;
  2246. int err;
  2247. int header_len = 0;
  2248. int nfheader_len = 0;
  2249. int trailer_len = 0;
  2250. int tos;
  2251. int family = policy->selector.family;
  2252. xfrm_address_t saddr, daddr;
  2253. xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
  2254. tos = xfrm_get_tos(fl, family);
  2255. dst_hold(dst);
  2256. for (; i < nx; i++) {
  2257. struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
  2258. struct dst_entry *dst1 = &xdst->u.dst;
  2259. err = PTR_ERR(xdst);
  2260. if (IS_ERR(xdst)) {
  2261. dst_release(dst);
  2262. goto put_states;
  2263. }
  2264. bundle[i] = xdst;
  2265. if (!xdst_prev)
  2266. xdst0 = xdst;
  2267. else
  2268. /* Ref count is taken during xfrm_alloc_dst()
  2269. * No need to do dst_clone() on dst1
  2270. */
  2271. xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
  2272. if (xfrm[i]->sel.family == AF_UNSPEC) {
  2273. inner_mode = xfrm_ip2inner_mode(xfrm[i],
  2274. xfrm_af2proto(family));
  2275. if (!inner_mode) {
  2276. err = -EAFNOSUPPORT;
  2277. dst_release(dst);
  2278. goto put_states;
  2279. }
  2280. } else
  2281. inner_mode = &xfrm[i]->inner_mode;
  2282. xdst->route = dst;
  2283. dst_copy_metrics(dst1, dst);
  2284. if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
  2285. __u32 mark = 0;
  2286. int oif;
  2287. if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
  2288. mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
  2289. if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
  2290. family = xfrm[i]->props.family;
  2291. oif = fl->flowi_oif ? : fl->flowi_l3mdev;
  2292. dst = xfrm_dst_lookup(xfrm[i], tos, oif,
  2293. &saddr, &daddr, family, mark);
  2294. err = PTR_ERR(dst);
  2295. if (IS_ERR(dst))
  2296. goto put_states;
  2297. } else
  2298. dst_hold(dst);
  2299. dst1->xfrm = xfrm[i];
  2300. xdst->xfrm_genid = xfrm[i]->genid;
  2301. dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
  2302. dst1->lastuse = now;
  2303. dst1->input = dst_discard;
  2304. rcu_read_lock();
  2305. afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
  2306. if (likely(afinfo))
  2307. dst1->output = afinfo->output;
  2308. else
  2309. dst1->output = dst_discard_out;
  2310. rcu_read_unlock();
  2311. xdst_prev = xdst;
  2312. header_len += xfrm[i]->props.header_len;
  2313. if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
  2314. nfheader_len += xfrm[i]->props.header_len;
  2315. trailer_len += xfrm[i]->props.trailer_len;
  2316. }
  2317. xfrm_dst_set_child(xdst_prev, dst);
  2318. xdst0->path = dst;
  2319. err = -ENODEV;
  2320. dev = dst->dev;
  2321. if (!dev)
  2322. goto free_dst;
  2323. xfrm_init_path(xdst0, dst, nfheader_len);
  2324. xfrm_init_pmtu(bundle, nx);
  2325. for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
  2326. xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
  2327. err = xfrm_fill_dst(xdst_prev, dev, fl);
  2328. if (err)
  2329. goto free_dst;
  2330. xdst_prev->u.dst.header_len = header_len;
  2331. xdst_prev->u.dst.trailer_len = trailer_len;
  2332. header_len -= xdst_prev->u.dst.xfrm->props.header_len;
  2333. trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
  2334. }
  2335. return &xdst0->u.dst;
  2336. put_states:
  2337. for (; i < nx; i++)
  2338. xfrm_state_put(xfrm[i]);
  2339. free_dst:
  2340. if (xdst0)
  2341. dst_release_immediate(&xdst0->u.dst);
  2342. return ERR_PTR(err);
  2343. }
  2344. static int xfrm_expand_policies(const struct flowi *fl, u16 family,
  2345. struct xfrm_policy **pols,
  2346. int *num_pols, int *num_xfrms)
  2347. {
  2348. int i;
  2349. if (*num_pols == 0 || !pols[0]) {
  2350. *num_pols = 0;
  2351. *num_xfrms = 0;
  2352. return 0;
  2353. }
  2354. if (IS_ERR(pols[0])) {
  2355. *num_pols = 0;
  2356. return PTR_ERR(pols[0]);
  2357. }
  2358. *num_xfrms = pols[0]->xfrm_nr;
  2359. #ifdef CONFIG_XFRM_SUB_POLICY
  2360. if (pols[0]->action == XFRM_POLICY_ALLOW &&
  2361. pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
  2362. pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
  2363. XFRM_POLICY_TYPE_MAIN,
  2364. fl, family,
  2365. XFRM_POLICY_OUT,
  2366. pols[0]->if_id);
  2367. if (pols[1]) {
  2368. if (IS_ERR(pols[1])) {
  2369. xfrm_pols_put(pols, *num_pols);
  2370. *num_pols = 0;
  2371. return PTR_ERR(pols[1]);
  2372. }
  2373. (*num_pols)++;
  2374. (*num_xfrms) += pols[1]->xfrm_nr;
  2375. }
  2376. }
  2377. #endif
  2378. for (i = 0; i < *num_pols; i++) {
  2379. if (pols[i]->action != XFRM_POLICY_ALLOW) {
  2380. *num_xfrms = -1;
  2381. break;
  2382. }
  2383. }
  2384. return 0;
  2385. }
  2386. static struct xfrm_dst *
  2387. xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
  2388. const struct flowi *fl, u16 family,
  2389. struct dst_entry *dst_orig)
  2390. {
  2391. struct net *net = xp_net(pols[0]);
  2392. struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
  2393. struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  2394. struct xfrm_dst *xdst;
  2395. struct dst_entry *dst;
  2396. int err;
  2397. /* Try to instantiate a bundle */
  2398. err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
  2399. if (err <= 0) {
  2400. if (err == 0)
  2401. return NULL;
  2402. if (err != -EAGAIN)
  2403. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
  2404. return ERR_PTR(err);
  2405. }
  2406. dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
  2407. if (IS_ERR(dst)) {
  2408. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
  2409. return ERR_CAST(dst);
  2410. }
  2411. xdst = (struct xfrm_dst *)dst;
  2412. xdst->num_xfrms = err;
  2413. xdst->num_pols = num_pols;
  2414. memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
  2415. xdst->policy_genid = atomic_read(&pols[0]->genid);
  2416. return xdst;
  2417. }
  2418. static void xfrm_policy_queue_process(struct timer_list *t)
  2419. {
  2420. struct sk_buff *skb;
  2421. struct sock *sk;
  2422. struct dst_entry *dst;
  2423. struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
  2424. struct net *net = xp_net(pol);
  2425. struct xfrm_policy_queue *pq = &pol->polq;
  2426. struct flowi fl;
  2427. struct sk_buff_head list;
  2428. __u32 skb_mark;
  2429. spin_lock(&pq->hold_queue.lock);
  2430. skb = skb_peek(&pq->hold_queue);
  2431. if (!skb) {
  2432. spin_unlock(&pq->hold_queue.lock);
  2433. goto out;
  2434. }
  2435. dst = skb_dst(skb);
  2436. sk = skb->sk;
  2437. /* Fixup the mark to support VTI. */
  2438. skb_mark = skb->mark;
  2439. skb->mark = pol->mark.v;
  2440. xfrm_decode_session(net, skb, &fl, dst->ops->family);
  2441. skb->mark = skb_mark;
  2442. spin_unlock(&pq->hold_queue.lock);
  2443. dst_hold(xfrm_dst_path(dst));
  2444. dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
  2445. if (IS_ERR(dst))
  2446. goto purge_queue;
  2447. if (dst->flags & DST_XFRM_QUEUE) {
  2448. dst_release(dst);
  2449. if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
  2450. goto purge_queue;
  2451. pq->timeout = pq->timeout << 1;
  2452. if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
  2453. xfrm_pol_hold(pol);
  2454. goto out;
  2455. }
  2456. dst_release(dst);
  2457. __skb_queue_head_init(&list);
  2458. spin_lock(&pq->hold_queue.lock);
  2459. pq->timeout = 0;
  2460. skb_queue_splice_init(&pq->hold_queue, &list);
  2461. spin_unlock(&pq->hold_queue.lock);
  2462. while (!skb_queue_empty(&list)) {
  2463. skb = __skb_dequeue(&list);
  2464. /* Fixup the mark to support VTI. */
  2465. skb_mark = skb->mark;
  2466. skb->mark = pol->mark.v;
  2467. xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
  2468. skb->mark = skb_mark;
  2469. dst_hold(xfrm_dst_path(skb_dst(skb)));
  2470. dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
  2471. if (IS_ERR(dst)) {
  2472. kfree_skb(skb);
  2473. continue;
  2474. }
  2475. nf_reset_ct(skb);
  2476. skb_dst_drop(skb);
  2477. skb_dst_set(skb, dst);
  2478. dst_output(net, skb->sk, skb);
  2479. }
  2480. out:
  2481. xfrm_pol_put(pol);
  2482. return;
  2483. purge_queue:
  2484. pq->timeout = 0;
  2485. skb_queue_purge(&pq->hold_queue);
  2486. xfrm_pol_put(pol);
  2487. }
  2488. static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  2489. {
  2490. unsigned long sched_next;
  2491. struct dst_entry *dst = skb_dst(skb);
  2492. struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
  2493. struct xfrm_policy *pol = xdst->pols[0];
  2494. struct xfrm_policy_queue *pq = &pol->polq;
  2495. if (unlikely(skb_fclone_busy(sk, skb))) {
  2496. kfree_skb(skb);
  2497. return 0;
  2498. }
  2499. if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
  2500. kfree_skb(skb);
  2501. return -EAGAIN;
  2502. }
  2503. skb_dst_force(skb);
  2504. spin_lock_bh(&pq->hold_queue.lock);
  2505. if (!pq->timeout)
  2506. pq->timeout = XFRM_QUEUE_TMO_MIN;
  2507. sched_next = jiffies + pq->timeout;
  2508. if (del_timer(&pq->hold_timer)) {
  2509. if (time_before(pq->hold_timer.expires, sched_next))
  2510. sched_next = pq->hold_timer.expires;
  2511. xfrm_pol_put(pol);
  2512. }
  2513. __skb_queue_tail(&pq->hold_queue, skb);
  2514. if (!mod_timer(&pq->hold_timer, sched_next))
  2515. xfrm_pol_hold(pol);
  2516. spin_unlock_bh(&pq->hold_queue.lock);
  2517. return 0;
  2518. }
  2519. static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
  2520. struct xfrm_flo *xflo,
  2521. const struct flowi *fl,
  2522. int num_xfrms,
  2523. u16 family)
  2524. {
  2525. int err;
  2526. struct net_device *dev;
  2527. struct dst_entry *dst;
  2528. struct dst_entry *dst1;
  2529. struct xfrm_dst *xdst;
  2530. xdst = xfrm_alloc_dst(net, family);
  2531. if (IS_ERR(xdst))
  2532. return xdst;
  2533. if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
  2534. net->xfrm.sysctl_larval_drop ||
  2535. num_xfrms <= 0)
  2536. return xdst;
  2537. dst = xflo->dst_orig;
  2538. dst1 = &xdst->u.dst;
  2539. dst_hold(dst);
  2540. xdst->route = dst;
  2541. dst_copy_metrics(dst1, dst);
  2542. dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
  2543. dst1->flags |= DST_XFRM_QUEUE;
  2544. dst1->lastuse = jiffies;
  2545. dst1->input = dst_discard;
  2546. dst1->output = xdst_queue_output;
  2547. dst_hold(dst);
  2548. xfrm_dst_set_child(xdst, dst);
  2549. xdst->path = dst;
  2550. xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
  2551. err = -ENODEV;
  2552. dev = dst->dev;
  2553. if (!dev)
  2554. goto free_dst;
  2555. err = xfrm_fill_dst(xdst, dev, fl);
  2556. if (err)
  2557. goto free_dst;
  2558. out:
  2559. return xdst;
  2560. free_dst:
  2561. dst_release(dst1);
  2562. xdst = ERR_PTR(err);
  2563. goto out;
  2564. }
  2565. static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
  2566. const struct flowi *fl,
  2567. u16 family, u8 dir,
  2568. struct xfrm_flo *xflo, u32 if_id)
  2569. {
  2570. struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
  2571. int num_pols = 0, num_xfrms = 0, err;
  2572. struct xfrm_dst *xdst;
  2573. /* Resolve policies to use if we couldn't get them from
  2574. * previous cache entry */
  2575. num_pols = 1;
  2576. pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
  2577. err = xfrm_expand_policies(fl, family, pols,
  2578. &num_pols, &num_xfrms);
  2579. if (err < 0)
  2580. goto inc_error;
  2581. if (num_pols == 0)
  2582. return NULL;
  2583. if (num_xfrms <= 0)
  2584. goto make_dummy_bundle;
  2585. xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
  2586. xflo->dst_orig);
  2587. if (IS_ERR(xdst)) {
  2588. err = PTR_ERR(xdst);
  2589. if (err == -EREMOTE) {
  2590. xfrm_pols_put(pols, num_pols);
  2591. return NULL;
  2592. }
  2593. if (err != -EAGAIN)
  2594. goto error;
  2595. goto make_dummy_bundle;
  2596. } else if (xdst == NULL) {
  2597. num_xfrms = 0;
  2598. goto make_dummy_bundle;
  2599. }
  2600. return xdst;
  2601. make_dummy_bundle:
  2602. /* We found policies, but there's no bundles to instantiate:
  2603. * either because the policy blocks, has no transformations or
  2604. * we could not build template (no xfrm_states).*/
  2605. xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
  2606. if (IS_ERR(xdst)) {
  2607. xfrm_pols_put(pols, num_pols);
  2608. return ERR_CAST(xdst);
  2609. }
  2610. xdst->num_pols = num_pols;
  2611. xdst->num_xfrms = num_xfrms;
  2612. memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
  2613. return xdst;
  2614. inc_error:
  2615. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
  2616. error:
  2617. xfrm_pols_put(pols, num_pols);
  2618. return ERR_PTR(err);
  2619. }
  2620. static struct dst_entry *make_blackhole(struct net *net, u16 family,
  2621. struct dst_entry *dst_orig)
  2622. {
  2623. const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
  2624. struct dst_entry *ret;
  2625. if (!afinfo) {
  2626. dst_release(dst_orig);
  2627. return ERR_PTR(-EINVAL);
  2628. } else {
  2629. ret = afinfo->blackhole_route(net, dst_orig);
  2630. }
  2631. rcu_read_unlock();
  2632. return ret;
  2633. }
  2634. /* Finds/creates a bundle for given flow and if_id
  2635. *
  2636. * At the moment we eat a raw IP route. Mostly to speed up lookups
  2637. * on interfaces with disabled IPsec.
  2638. *
  2639. * xfrm_lookup uses an if_id of 0 by default, and is provided for
  2640. * compatibility
  2641. */
  2642. struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
  2643. struct dst_entry *dst_orig,
  2644. const struct flowi *fl,
  2645. const struct sock *sk,
  2646. int flags, u32 if_id)
  2647. {
  2648. struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
  2649. struct xfrm_dst *xdst;
  2650. struct dst_entry *dst, *route;
  2651. u16 family = dst_orig->ops->family;
  2652. u8 dir = XFRM_POLICY_OUT;
  2653. int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
  2654. dst = NULL;
  2655. xdst = NULL;
  2656. route = NULL;
  2657. sk = sk_const_to_full_sk(sk);
  2658. if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
  2659. num_pols = 1;
  2660. pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
  2661. if_id);
  2662. err = xfrm_expand_policies(fl, family, pols,
  2663. &num_pols, &num_xfrms);
  2664. if (err < 0)
  2665. goto dropdst;
  2666. if (num_pols) {
  2667. if (num_xfrms <= 0) {
  2668. drop_pols = num_pols;
  2669. goto no_transform;
  2670. }
  2671. xdst = xfrm_resolve_and_create_bundle(
  2672. pols, num_pols, fl,
  2673. family, dst_orig);
  2674. if (IS_ERR(xdst)) {
  2675. xfrm_pols_put(pols, num_pols);
  2676. err = PTR_ERR(xdst);
  2677. if (err == -EREMOTE)
  2678. goto nopol;
  2679. goto dropdst;
  2680. } else if (xdst == NULL) {
  2681. num_xfrms = 0;
  2682. drop_pols = num_pols;
  2683. goto no_transform;
  2684. }
  2685. route = xdst->route;
  2686. }
  2687. }
  2688. if (xdst == NULL) {
  2689. struct xfrm_flo xflo;
  2690. xflo.dst_orig = dst_orig;
  2691. xflo.flags = flags;
  2692. /* To accelerate a bit... */
  2693. if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
  2694. !net->xfrm.policy_count[XFRM_POLICY_OUT]))
  2695. goto nopol;
  2696. xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
  2697. if (xdst == NULL)
  2698. goto nopol;
  2699. if (IS_ERR(xdst)) {
  2700. err = PTR_ERR(xdst);
  2701. goto dropdst;
  2702. }
  2703. num_pols = xdst->num_pols;
  2704. num_xfrms = xdst->num_xfrms;
  2705. memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
  2706. route = xdst->route;
  2707. }
  2708. dst = &xdst->u.dst;
  2709. if (route == NULL && num_xfrms > 0) {
  2710. /* The only case when xfrm_bundle_lookup() returns a
  2711. * bundle with null route, is when the template could
  2712. * not be resolved. It means policies are there, but
  2713. * bundle could not be created, since we don't yet
  2714. * have the xfrm_state's. We need to wait for KM to
  2715. * negotiate new SA's or bail out with error.*/
  2716. if (net->xfrm.sysctl_larval_drop) {
  2717. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
  2718. err = -EREMOTE;
  2719. goto error;
  2720. }
  2721. err = -EAGAIN;
  2722. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
  2723. goto error;
  2724. }
  2725. no_transform:
  2726. if (num_pols == 0)
  2727. goto nopol;
  2728. if ((flags & XFRM_LOOKUP_ICMP) &&
  2729. !(pols[0]->flags & XFRM_POLICY_ICMP)) {
  2730. err = -ENOENT;
  2731. goto error;
  2732. }
  2733. for (i = 0; i < num_pols; i++)
  2734. WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds());
  2735. if (num_xfrms < 0) {
  2736. /* Prohibit the flow */
  2737. XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
  2738. err = -EPERM;
  2739. goto error;
  2740. } else if (num_xfrms > 0) {
  2741. /* Flow transformed */
  2742. dst_release(dst_orig);
  2743. } else {
  2744. /* Flow passes untransformed */
  2745. dst_release(dst);
  2746. dst = dst_orig;
  2747. }
  2748. ok:
  2749. xfrm_pols_put(pols, drop_pols);
  2750. if (dst && dst->xfrm &&
  2751. dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
  2752. dst->flags |= DST_XFRM_TUNNEL;
  2753. return dst;
  2754. nopol:
  2755. if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
  2756. net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
  2757. err = -EPERM;
  2758. goto error;
  2759. }
  2760. if (!(flags & XFRM_LOOKUP_ICMP)) {
  2761. dst = dst_orig;
  2762. goto ok;
  2763. }
  2764. err = -ENOENT;
  2765. error:
  2766. dst_release(dst);
  2767. dropdst:
  2768. if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
  2769. dst_release(dst_orig);
  2770. xfrm_pols_put(pols, drop_pols);
  2771. return ERR_PTR(err);
  2772. }
  2773. EXPORT_SYMBOL(xfrm_lookup_with_ifid);
  2774. /* Main function: finds/creates a bundle for given flow.
  2775. *
  2776. * At the moment we eat a raw IP route. Mostly to speed up lookups
  2777. * on interfaces with disabled IPsec.
  2778. */
  2779. struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
  2780. const struct flowi *fl, const struct sock *sk,
  2781. int flags)
  2782. {
  2783. return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
  2784. }
  2785. EXPORT_SYMBOL(xfrm_lookup);
  2786. /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
  2787. * Otherwise we may send out blackholed packets.
  2788. */
  2789. struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
  2790. const struct flowi *fl,
  2791. const struct sock *sk, int flags)
  2792. {
  2793. struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
  2794. flags | XFRM_LOOKUP_QUEUE |
  2795. XFRM_LOOKUP_KEEP_DST_REF);
  2796. if (PTR_ERR(dst) == -EREMOTE)
  2797. return make_blackhole(net, dst_orig->ops->family, dst_orig);
  2798. if (IS_ERR(dst))
  2799. dst_release(dst_orig);
  2800. return dst;
  2801. }
  2802. EXPORT_SYMBOL(xfrm_lookup_route);
  2803. static inline int
  2804. xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
  2805. {
  2806. struct sec_path *sp = skb_sec_path(skb);
  2807. struct xfrm_state *x;
  2808. if (!sp || idx < 0 || idx >= sp->len)
  2809. return 0;
  2810. x = sp->xvec[idx];
  2811. if (!x->type->reject)
  2812. return 0;
  2813. return x->type->reject(x, skb, fl);
  2814. }
  2815. /* When skb is transformed back to its "native" form, we have to
  2816. * check policy restrictions. At the moment we make this in maximally
  2817. * stupid way. Shame on me. :-) Of course, connected sockets must
  2818. * have policy cached at them.
  2819. */
  2820. static inline int
  2821. xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
  2822. unsigned short family, u32 if_id)
  2823. {
  2824. if (xfrm_state_kern(x))
  2825. return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
  2826. return x->id.proto == tmpl->id.proto &&
  2827. (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
  2828. (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
  2829. x->props.mode == tmpl->mode &&
  2830. (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
  2831. !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
  2832. !(x->props.mode != XFRM_MODE_TRANSPORT &&
  2833. xfrm_state_addr_cmp(tmpl, x, family)) &&
  2834. (if_id == 0 || if_id == x->if_id);
  2835. }
  2836. /*
  2837. * 0 or more than 0 is returned when validation is succeeded (either bypass
  2838. * because of optional transport mode, or next index of the matched secpath
  2839. * state with the template.
  2840. * -1 is returned when no matching template is found.
  2841. * Otherwise "-2 - errored_index" is returned.
  2842. */
  2843. static inline int
  2844. xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
  2845. unsigned short family, u32 if_id)
  2846. {
  2847. int idx = start;
  2848. if (tmpl->optional) {
  2849. if (tmpl->mode == XFRM_MODE_TRANSPORT)
  2850. return start;
  2851. } else
  2852. start = -1;
  2853. for (; idx < sp->len; idx++) {
  2854. if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
  2855. return ++idx;
  2856. if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
  2857. if (idx < sp->verified_cnt) {
  2858. /* Secpath entry previously verified, consider optional and
  2859. * continue searching
  2860. */
  2861. continue;
  2862. }
  2863. if (start == -1)
  2864. start = -2-idx;
  2865. break;
  2866. }
  2867. }
  2868. return start;
  2869. }
  2870. static void
  2871. decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
  2872. {
  2873. struct flowi4 *fl4 = &fl->u.ip4;
  2874. memset(fl4, 0, sizeof(struct flowi4));
  2875. if (reverse) {
  2876. fl4->saddr = flkeys->addrs.ipv4.dst;
  2877. fl4->daddr = flkeys->addrs.ipv4.src;
  2878. fl4->fl4_sport = flkeys->ports.dst;
  2879. fl4->fl4_dport = flkeys->ports.src;
  2880. } else {
  2881. fl4->saddr = flkeys->addrs.ipv4.src;
  2882. fl4->daddr = flkeys->addrs.ipv4.dst;
  2883. fl4->fl4_sport = flkeys->ports.src;
  2884. fl4->fl4_dport = flkeys->ports.dst;
  2885. }
  2886. switch (flkeys->basic.ip_proto) {
  2887. case IPPROTO_GRE:
  2888. fl4->fl4_gre_key = flkeys->gre.keyid;
  2889. break;
  2890. case IPPROTO_ICMP:
  2891. fl4->fl4_icmp_type = flkeys->icmp.type;
  2892. fl4->fl4_icmp_code = flkeys->icmp.code;
  2893. break;
  2894. }
  2895. fl4->flowi4_proto = flkeys->basic.ip_proto;
  2896. fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
  2897. }
  2898. #if IS_ENABLED(CONFIG_IPV6)
  2899. static void
  2900. decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
  2901. {
  2902. struct flowi6 *fl6 = &fl->u.ip6;
  2903. memset(fl6, 0, sizeof(struct flowi6));
  2904. if (reverse) {
  2905. fl6->saddr = flkeys->addrs.ipv6.dst;
  2906. fl6->daddr = flkeys->addrs.ipv6.src;
  2907. fl6->fl6_sport = flkeys->ports.dst;
  2908. fl6->fl6_dport = flkeys->ports.src;
  2909. } else {
  2910. fl6->saddr = flkeys->addrs.ipv6.src;
  2911. fl6->daddr = flkeys->addrs.ipv6.dst;
  2912. fl6->fl6_sport = flkeys->ports.src;
  2913. fl6->fl6_dport = flkeys->ports.dst;
  2914. }
  2915. switch (flkeys->basic.ip_proto) {
  2916. case IPPROTO_GRE:
  2917. fl6->fl6_gre_key = flkeys->gre.keyid;
  2918. break;
  2919. case IPPROTO_ICMPV6:
  2920. fl6->fl6_icmp_type = flkeys->icmp.type;
  2921. fl6->fl6_icmp_code = flkeys->icmp.code;
  2922. break;
  2923. }
  2924. fl6->flowi6_proto = flkeys->basic.ip_proto;
  2925. }
  2926. #endif
  2927. int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
  2928. unsigned int family, int reverse)
  2929. {
  2930. struct xfrm_flow_keys flkeys;
  2931. memset(&flkeys, 0, sizeof(flkeys));
  2932. __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
  2933. NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
  2934. switch (family) {
  2935. case AF_INET:
  2936. decode_session4(&flkeys, fl, reverse);
  2937. break;
  2938. #if IS_ENABLED(CONFIG_IPV6)
  2939. case AF_INET6:
  2940. decode_session6(&flkeys, fl, reverse);
  2941. break;
  2942. #endif
  2943. default:
  2944. return -EAFNOSUPPORT;
  2945. }
  2946. fl->flowi_mark = skb->mark;
  2947. if (reverse) {
  2948. fl->flowi_oif = skb->skb_iif;
  2949. } else {
  2950. int oif = 0;
  2951. if (skb_dst(skb) && skb_dst(skb)->dev)
  2952. oif = skb_dst(skb)->dev->ifindex;
  2953. fl->flowi_oif = oif;
  2954. }
  2955. return security_xfrm_decode_session(skb, &fl->flowi_secid);
  2956. }
  2957. EXPORT_SYMBOL(__xfrm_decode_session);
  2958. static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
  2959. {
  2960. for (; k < sp->len; k++) {
  2961. if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
  2962. *idxp = k;
  2963. return 1;
  2964. }
  2965. }
  2966. return 0;
  2967. }
  2968. static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
  2969. {
  2970. const struct flowi4 *fl4 = &fl->u.ip4;
  2971. if (family == AF_INET &&
  2972. fl4->flowi4_proto == IPPROTO_ICMP &&
  2973. (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
  2974. fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
  2975. return true;
  2976. #if IS_ENABLED(CONFIG_IPV6)
  2977. if (family == AF_INET6) {
  2978. const struct flowi6 *fl6 = &fl->u.ip6;
  2979. if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
  2980. (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
  2981. fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
  2982. fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
  2983. return true;
  2984. }
  2985. #endif
  2986. return false;
  2987. }
  2988. static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
  2989. const struct flowi *fl, struct flowi *fl1)
  2990. {
  2991. bool ret = true;
  2992. struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  2993. int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) :
  2994. (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
  2995. if (!newskb)
  2996. return true;
  2997. if (!pskb_pull(newskb, hl))
  2998. goto out;
  2999. skb_reset_network_header(newskb);
  3000. if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
  3001. goto out;
  3002. fl1->flowi_oif = fl->flowi_oif;
  3003. fl1->flowi_mark = fl->flowi_mark;
  3004. fl1->flowi_tos = fl->flowi_tos;
  3005. nf_nat_decode_session(newskb, fl1, family);
  3006. ret = false;
  3007. out:
  3008. consume_skb(newskb);
  3009. return ret;
  3010. }
  3011. static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
  3012. const struct xfrm_selector *sel,
  3013. const struct flowi *fl)
  3014. {
  3015. bool ret = false;
  3016. if (icmp_err_packet(fl, family)) {
  3017. struct flowi fl1;
  3018. if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
  3019. return ret;
  3020. ret = xfrm_selector_match(sel, &fl1, family);
  3021. }
  3022. return ret;
  3023. }
  3024. static inline struct
  3025. xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
  3026. const struct flowi *fl, unsigned short family,
  3027. u32 if_id)
  3028. {
  3029. struct xfrm_policy *pol = NULL;
  3030. if (icmp_err_packet(fl, family)) {
  3031. struct flowi fl1;
  3032. struct net *net = dev_net(skb->dev);
  3033. if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
  3034. return pol;
  3035. pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
  3036. if (IS_ERR(pol))
  3037. pol = NULL;
  3038. }
  3039. return pol;
  3040. }
  3041. static inline struct
  3042. dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
  3043. unsigned short family, struct dst_entry *dst)
  3044. {
  3045. if (icmp_err_packet(fl, family)) {
  3046. struct net *net = dev_net(skb->dev);
  3047. struct dst_entry *dst2;
  3048. struct flowi fl1;
  3049. if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
  3050. return dst;
  3051. dst_hold(dst);
  3052. dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
  3053. if (IS_ERR(dst2))
  3054. return dst;
  3055. if (dst2->xfrm) {
  3056. dst_release(dst);
  3057. dst = dst2;
  3058. } else {
  3059. dst_release(dst2);
  3060. }
  3061. }
  3062. return dst;
  3063. }
  3064. int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
  3065. unsigned short family)
  3066. {
  3067. struct net *net = dev_net(skb->dev);
  3068. struct xfrm_policy *pol;
  3069. struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
  3070. int npols = 0;
  3071. int xfrm_nr;
  3072. int pi;
  3073. int reverse;
  3074. struct flowi fl;
  3075. int xerr_idx = -1;
  3076. const struct xfrm_if_cb *ifcb;
  3077. struct sec_path *sp;
  3078. u32 if_id = 0;
  3079. rcu_read_lock();
  3080. ifcb = xfrm_if_get_cb();
  3081. if (ifcb) {
  3082. struct xfrm_if_decode_session_result r;
  3083. if (ifcb->decode_session(skb, family, &r)) {
  3084. if_id = r.if_id;
  3085. net = r.net;
  3086. }
  3087. }
  3088. rcu_read_unlock();
  3089. reverse = dir & ~XFRM_POLICY_MASK;
  3090. dir &= XFRM_POLICY_MASK;
  3091. if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
  3092. XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
  3093. return 0;
  3094. }
  3095. nf_nat_decode_session(skb, &fl, family);
  3096. /* First, check used SA against their selectors. */
  3097. sp = skb_sec_path(skb);
  3098. if (sp) {
  3099. int i;
  3100. for (i = sp->len - 1; i >= 0; i--) {
  3101. struct xfrm_state *x = sp->xvec[i];
  3102. int ret = 0;
  3103. if (!xfrm_selector_match(&x->sel, &fl, family)) {
  3104. ret = 1;
  3105. if (x->props.flags & XFRM_STATE_ICMP &&
  3106. xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
  3107. ret = 0;
  3108. if (ret) {
  3109. XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
  3110. return 0;
  3111. }
  3112. }
  3113. }
  3114. }
  3115. pol = NULL;
  3116. sk = sk_to_full_sk(sk);
  3117. if (sk && sk->sk_policy[dir]) {
  3118. pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
  3119. if (IS_ERR(pol)) {
  3120. XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
  3121. return 0;
  3122. }
  3123. }
  3124. if (!pol)
  3125. pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
  3126. if (IS_ERR(pol)) {
  3127. XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
  3128. return 0;
  3129. }
  3130. if (!pol && dir == XFRM_POLICY_FWD)
  3131. pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
  3132. if (!pol) {
  3133. const bool is_crypto_offload = sp &&
  3134. (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO);
  3135. if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
  3136. XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
  3137. return 0;
  3138. }
  3139. if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) {
  3140. xfrm_secpath_reject(xerr_idx, skb, &fl);
  3141. XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
  3142. return 0;
  3143. }
  3144. return 1;
  3145. }
  3146. /* This lockless write can happen from different cpus. */
  3147. WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
  3148. pols[0] = pol;
  3149. npols++;
  3150. #ifdef CONFIG_XFRM_SUB_POLICY
  3151. if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
  3152. pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
  3153. &fl, family,
  3154. XFRM_POLICY_IN, if_id);
  3155. if (pols[1]) {
  3156. if (IS_ERR(pols[1])) {
  3157. XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
  3158. xfrm_pol_put(pols[0]);
  3159. return 0;
  3160. }
  3161. /* This write can happen from different cpus. */
  3162. WRITE_ONCE(pols[1]->curlft.use_time,
  3163. ktime_get_real_seconds());
  3164. npols++;
  3165. }
  3166. }
  3167. #endif
  3168. if (pol->action == XFRM_POLICY_ALLOW) {
  3169. static struct sec_path dummy;
  3170. struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
  3171. struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
  3172. struct xfrm_tmpl **tpp = tp;
  3173. int ti = 0;
  3174. int i, k;
  3175. sp = skb_sec_path(skb);
  3176. if (!sp)
  3177. sp = &dummy;
  3178. for (pi = 0; pi < npols; pi++) {
  3179. if (pols[pi] != pol &&
  3180. pols[pi]->action != XFRM_POLICY_ALLOW) {
  3181. XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
  3182. goto reject;
  3183. }
  3184. if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
  3185. XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
  3186. goto reject_error;
  3187. }
  3188. for (i = 0; i < pols[pi]->xfrm_nr; i++)
  3189. tpp[ti++] = &pols[pi]->xfrm_vec[i];
  3190. }
  3191. xfrm_nr = ti;
  3192. if (npols > 1) {
  3193. xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
  3194. tpp = stp;
  3195. }
  3196. /* For each tunnel xfrm, find the first matching tmpl.
  3197. * For each tmpl before that, find corresponding xfrm.
  3198. * Order is _important_. Later we will implement
  3199. * some barriers, but at the moment barriers
  3200. * are implied between each two transformations.
  3201. * Upon success, marks secpath entries as having been
  3202. * verified to allow them to be skipped in future policy
  3203. * checks (e.g. nested tunnels).
  3204. */
  3205. for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
  3206. k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
  3207. if (k < 0) {
  3208. if (k < -1)
  3209. /* "-2 - errored_index" returned */
  3210. xerr_idx = -(2+k);
  3211. XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
  3212. goto reject;
  3213. }
  3214. }
  3215. if (secpath_has_nontransport(sp, k, &xerr_idx)) {
  3216. XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
  3217. goto reject;
  3218. }
  3219. xfrm_pols_put(pols, npols);
  3220. sp->verified_cnt = k;
  3221. return 1;
  3222. }
  3223. XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
  3224. reject:
  3225. xfrm_secpath_reject(xerr_idx, skb, &fl);
  3226. reject_error:
  3227. xfrm_pols_put(pols, npols);
  3228. return 0;
  3229. }
  3230. EXPORT_SYMBOL(__xfrm_policy_check);
  3231. int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
  3232. {
  3233. struct net *net = dev_net(skb->dev);
  3234. struct flowi fl;
  3235. struct dst_entry *dst;
  3236. int res = 1;
  3237. if (xfrm_decode_session(net, skb, &fl, family) < 0) {
  3238. XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
  3239. return 0;
  3240. }
  3241. skb_dst_force(skb);
  3242. if (!skb_dst(skb)) {
  3243. XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
  3244. return 0;
  3245. }
  3246. dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
  3247. if (IS_ERR(dst)) {
  3248. res = 0;
  3249. dst = NULL;
  3250. }
  3251. if (dst && !dst->xfrm)
  3252. dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
  3253. skb_dst_set(skb, dst);
  3254. return res;
  3255. }
  3256. EXPORT_SYMBOL(__xfrm_route_forward);
  3257. /* Optimize later using cookies and generation ids. */
  3258. static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
  3259. {
  3260. /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
  3261. * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
  3262. * get validated by dst_ops->check on every use. We do this
  3263. * because when a normal route referenced by an XFRM dst is
  3264. * obsoleted we do not go looking around for all parent
  3265. * referencing XFRM dsts so that we can invalidate them. It
  3266. * is just too much work. Instead we make the checks here on
  3267. * every use. For example:
  3268. *
  3269. * XFRM dst A --> IPv4 dst X
  3270. *
  3271. * X is the "xdst->route" of A (X is also the "dst->path" of A
  3272. * in this example). If X is marked obsolete, "A" will not
  3273. * notice. That's what we are validating here via the
  3274. * stale_bundle() check.
  3275. *
  3276. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
  3277. * be marked on it.
  3278. * This will force stale_bundle() to fail on any xdst bundle with
  3279. * this dst linked in it.
  3280. */
  3281. if (dst->obsolete < 0 && !stale_bundle(dst))
  3282. return dst;
  3283. return NULL;
  3284. }
  3285. static int stale_bundle(struct dst_entry *dst)
  3286. {
  3287. return !xfrm_bundle_ok((struct xfrm_dst *)dst);
  3288. }
  3289. void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
  3290. {
  3291. while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
  3292. dst->dev = blackhole_netdev;
  3293. dev_hold(dst->dev);
  3294. dev_put(dev);
  3295. }
  3296. }
  3297. EXPORT_SYMBOL(xfrm_dst_ifdown);
  3298. static void xfrm_link_failure(struct sk_buff *skb)
  3299. {
  3300. /* Impossible. Such dst must be popped before reaches point of failure. */
  3301. }
  3302. static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
  3303. {
  3304. if (dst->obsolete)
  3305. sk_dst_reset(sk);
  3306. }
  3307. static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
  3308. {
  3309. while (nr--) {
  3310. struct xfrm_dst *xdst = bundle[nr];
  3311. u32 pmtu, route_mtu_cached;
  3312. struct dst_entry *dst;
  3313. dst = &xdst->u.dst;
  3314. pmtu = dst_mtu(xfrm_dst_child(dst));
  3315. xdst->child_mtu_cached = pmtu;
  3316. pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
  3317. route_mtu_cached = dst_mtu(xdst->route);
  3318. xdst->route_mtu_cached = route_mtu_cached;
  3319. if (pmtu > route_mtu_cached)
  3320. pmtu = route_mtu_cached;
  3321. dst_metric_set(dst, RTAX_MTU, pmtu);
  3322. }
  3323. }
  3324. /* Check that the bundle accepts the flow and its components are
  3325. * still valid.
  3326. */
  3327. static int xfrm_bundle_ok(struct xfrm_dst *first)
  3328. {
  3329. struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  3330. struct dst_entry *dst = &first->u.dst;
  3331. struct xfrm_dst *xdst;
  3332. int start_from, nr;
  3333. u32 mtu;
  3334. if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
  3335. (dst->dev && !netif_running(dst->dev)))
  3336. return 0;
  3337. if (dst->flags & DST_XFRM_QUEUE)
  3338. return 1;
  3339. start_from = nr = 0;
  3340. do {
  3341. struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
  3342. if (dst->xfrm->km.state != XFRM_STATE_VALID)
  3343. return 0;
  3344. if (xdst->xfrm_genid != dst->xfrm->genid)
  3345. return 0;
  3346. if (xdst->num_pols > 0 &&
  3347. xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
  3348. return 0;
  3349. bundle[nr++] = xdst;
  3350. mtu = dst_mtu(xfrm_dst_child(dst));
  3351. if (xdst->child_mtu_cached != mtu) {
  3352. start_from = nr;
  3353. xdst->child_mtu_cached = mtu;
  3354. }
  3355. if (!dst_check(xdst->route, xdst->route_cookie))
  3356. return 0;
  3357. mtu = dst_mtu(xdst->route);
  3358. if (xdst->route_mtu_cached != mtu) {
  3359. start_from = nr;
  3360. xdst->route_mtu_cached = mtu;
  3361. }
  3362. dst = xfrm_dst_child(dst);
  3363. } while (dst->xfrm);
  3364. if (likely(!start_from))
  3365. return 1;
  3366. xdst = bundle[start_from - 1];
  3367. mtu = xdst->child_mtu_cached;
  3368. while (start_from--) {
  3369. dst = &xdst->u.dst;
  3370. mtu = xfrm_state_mtu(dst->xfrm, mtu);
  3371. if (mtu > xdst->route_mtu_cached)
  3372. mtu = xdst->route_mtu_cached;
  3373. dst_metric_set(dst, RTAX_MTU, mtu);
  3374. if (!start_from)
  3375. break;
  3376. xdst = bundle[start_from - 1];
  3377. xdst->child_mtu_cached = mtu;
  3378. }
  3379. return 1;
  3380. }
  3381. static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
  3382. {
  3383. return dst_metric_advmss(xfrm_dst_path(dst));
  3384. }
  3385. static unsigned int xfrm_mtu(const struct dst_entry *dst)
  3386. {
  3387. unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
  3388. return mtu ? : dst_mtu(xfrm_dst_path(dst));
  3389. }
  3390. static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
  3391. const void *daddr)
  3392. {
  3393. while (dst->xfrm) {
  3394. const struct xfrm_state *xfrm = dst->xfrm;
  3395. dst = xfrm_dst_child(dst);
  3396. if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
  3397. continue;
  3398. if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
  3399. daddr = xfrm->coaddr;
  3400. else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
  3401. daddr = &xfrm->id.daddr;
  3402. }
  3403. return daddr;
  3404. }
  3405. static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
  3406. struct sk_buff *skb,
  3407. const void *daddr)
  3408. {
  3409. const struct dst_entry *path = xfrm_dst_path(dst);
  3410. if (!skb)
  3411. daddr = xfrm_get_dst_nexthop(dst, daddr);
  3412. return path->ops->neigh_lookup(path, skb, daddr);
  3413. }
  3414. static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
  3415. {
  3416. const struct dst_entry *path = xfrm_dst_path(dst);
  3417. daddr = xfrm_get_dst_nexthop(dst, daddr);
  3418. path->ops->confirm_neigh(path, daddr);
  3419. }
  3420. int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family)
  3421. {
  3422. int err = 0;
  3423. if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo)))
  3424. return -EAFNOSUPPORT;
  3425. spin_lock(&xfrm_policy_afinfo_lock);
  3426. if (unlikely(xfrm_policy_afinfo[family] != NULL))
  3427. err = -EEXIST;
  3428. else {
  3429. struct dst_ops *dst_ops = afinfo->dst_ops;
  3430. if (likely(dst_ops->kmem_cachep == NULL))
  3431. dst_ops->kmem_cachep = xfrm_dst_cache;
  3432. if (likely(dst_ops->check == NULL))
  3433. dst_ops->check = xfrm_dst_check;
  3434. if (likely(dst_ops->default_advmss == NULL))
  3435. dst_ops->default_advmss = xfrm_default_advmss;
  3436. if (likely(dst_ops->mtu == NULL))
  3437. dst_ops->mtu = xfrm_mtu;
  3438. if (likely(dst_ops->negative_advice == NULL))
  3439. dst_ops->negative_advice = xfrm_negative_advice;
  3440. if (likely(dst_ops->link_failure == NULL))
  3441. dst_ops->link_failure = xfrm_link_failure;
  3442. if (likely(dst_ops->neigh_lookup == NULL))
  3443. dst_ops->neigh_lookup = xfrm_neigh_lookup;
  3444. if (likely(!dst_ops->confirm_neigh))
  3445. dst_ops->confirm_neigh = xfrm_confirm_neigh;
  3446. rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo);
  3447. }
  3448. spin_unlock(&xfrm_policy_afinfo_lock);
  3449. return err;
  3450. }
  3451. EXPORT_SYMBOL(xfrm_policy_register_afinfo);
  3452. void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
  3453. {
  3454. struct dst_ops *dst_ops = afinfo->dst_ops;
  3455. int i;
  3456. for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) {
  3457. if (xfrm_policy_afinfo[i] != afinfo)
  3458. continue;
  3459. RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL);
  3460. break;
  3461. }
  3462. synchronize_rcu();
  3463. dst_ops->kmem_cachep = NULL;
  3464. dst_ops->check = NULL;
  3465. dst_ops->negative_advice = NULL;
  3466. dst_ops->link_failure = NULL;
  3467. }
  3468. EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
  3469. void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
  3470. {
  3471. spin_lock(&xfrm_if_cb_lock);
  3472. rcu_assign_pointer(xfrm_if_cb, ifcb);
  3473. spin_unlock(&xfrm_if_cb_lock);
  3474. }
  3475. EXPORT_SYMBOL(xfrm_if_register_cb);
  3476. void xfrm_if_unregister_cb(void)
  3477. {
  3478. RCU_INIT_POINTER(xfrm_if_cb, NULL);
  3479. synchronize_rcu();
  3480. }
  3481. EXPORT_SYMBOL(xfrm_if_unregister_cb);
  3482. #ifdef CONFIG_XFRM_STATISTICS
  3483. static int __net_init xfrm_statistics_init(struct net *net)
  3484. {
  3485. int rv;
  3486. net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
  3487. if (!net->mib.xfrm_statistics)
  3488. return -ENOMEM;
  3489. rv = xfrm_proc_init(net);
  3490. if (rv < 0)
  3491. free_percpu(net->mib.xfrm_statistics);
  3492. return rv;
  3493. }
  3494. static void xfrm_statistics_fini(struct net *net)
  3495. {
  3496. xfrm_proc_fini(net);
  3497. free_percpu(net->mib.xfrm_statistics);
  3498. }
  3499. #else
  3500. static int __net_init xfrm_statistics_init(struct net *net)
  3501. {
  3502. return 0;
  3503. }
  3504. static void xfrm_statistics_fini(struct net *net)
  3505. {
  3506. }
  3507. #endif
  3508. static int __net_init xfrm_policy_init(struct net *net)
  3509. {
  3510. unsigned int hmask, sz;
  3511. int dir, err;
  3512. if (net_eq(net, &init_net)) {
  3513. xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
  3514. err = rhashtable_init(&xfrm_policy_inexact_table,
  3515. &xfrm_pol_inexact_params);
  3516. BUG_ON(err);
  3517. }
  3518. hmask = 8 - 1;
  3519. sz = (hmask+1) * sizeof(struct hlist_head);
  3520. net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
  3521. if (!net->xfrm.policy_byidx)
  3522. goto out_byidx;
  3523. net->xfrm.policy_idx_hmask = hmask;
  3524. for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  3525. struct xfrm_policy_hash *htab;
  3526. net->xfrm.policy_count[dir] = 0;
  3527. net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
  3528. htab = &net->xfrm.policy_bydst[dir];
  3529. htab->table = xfrm_hash_alloc(sz);
  3530. if (!htab->table)
  3531. goto out_bydst;
  3532. htab->hmask = hmask;
  3533. htab->dbits4 = 32;
  3534. htab->sbits4 = 32;
  3535. htab->dbits6 = 128;
  3536. htab->sbits6 = 128;
  3537. }
  3538. net->xfrm.policy_hthresh.lbits4 = 32;
  3539. net->xfrm.policy_hthresh.rbits4 = 32;
  3540. net->xfrm.policy_hthresh.lbits6 = 128;
  3541. net->xfrm.policy_hthresh.rbits6 = 128;
  3542. seqlock_init(&net->xfrm.policy_hthresh.lock);
  3543. INIT_LIST_HEAD(&net->xfrm.policy_all);
  3544. INIT_LIST_HEAD(&net->xfrm.inexact_bins);
  3545. INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
  3546. INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
  3547. return 0;
  3548. out_bydst:
  3549. for (dir--; dir >= 0; dir--) {
  3550. struct xfrm_policy_hash *htab;
  3551. htab = &net->xfrm.policy_bydst[dir];
  3552. xfrm_hash_free(htab->table, sz);
  3553. }
  3554. xfrm_hash_free(net->xfrm.policy_byidx, sz);
  3555. out_byidx:
  3556. return -ENOMEM;
  3557. }
  3558. static void xfrm_policy_fini(struct net *net)
  3559. {
  3560. struct xfrm_pol_inexact_bin *b, *t;
  3561. unsigned int sz;
  3562. int dir;
  3563. flush_work(&net->xfrm.policy_hash_work);
  3564. #ifdef CONFIG_XFRM_SUB_POLICY
  3565. xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
  3566. #endif
  3567. xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
  3568. WARN_ON(!list_empty(&net->xfrm.policy_all));
  3569. for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  3570. struct xfrm_policy_hash *htab;
  3571. htab = &net->xfrm.policy_bydst[dir];
  3572. sz = (htab->hmask + 1) * sizeof(struct hlist_head);
  3573. WARN_ON(!hlist_empty(htab->table));
  3574. xfrm_hash_free(htab->table, sz);
  3575. }
  3576. sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
  3577. WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
  3578. xfrm_hash_free(net->xfrm.policy_byidx, sz);
  3579. spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  3580. list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
  3581. __xfrm_policy_inexact_prune_bin(b, true);
  3582. spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  3583. }
  3584. static int __net_init xfrm_net_init(struct net *net)
  3585. {
  3586. int rv;
  3587. /* Initialize the per-net locks here */
  3588. spin_lock_init(&net->xfrm.xfrm_state_lock);
  3589. spin_lock_init(&net->xfrm.xfrm_policy_lock);
  3590. seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
  3591. mutex_init(&net->xfrm.xfrm_cfg_mutex);
  3592. net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
  3593. net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
  3594. net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
  3595. rv = xfrm_statistics_init(net);
  3596. if (rv < 0)
  3597. goto out_statistics;
  3598. rv = xfrm_state_init(net);
  3599. if (rv < 0)
  3600. goto out_state;
  3601. rv = xfrm_policy_init(net);
  3602. if (rv < 0)
  3603. goto out_policy;
  3604. rv = xfrm_sysctl_init(net);
  3605. if (rv < 0)
  3606. goto out_sysctl;
  3607. rv = xfrm_nat_keepalive_net_init(net);
  3608. if (rv < 0)
  3609. goto out_nat_keepalive;
  3610. return 0;
  3611. out_nat_keepalive:
  3612. xfrm_sysctl_fini(net);
  3613. out_sysctl:
  3614. xfrm_policy_fini(net);
  3615. out_policy:
  3616. xfrm_state_fini(net);
  3617. out_state:
  3618. xfrm_statistics_fini(net);
  3619. out_statistics:
  3620. return rv;
  3621. }
  3622. static void __net_exit xfrm_net_exit(struct net *net)
  3623. {
  3624. xfrm_nat_keepalive_net_fini(net);
  3625. xfrm_sysctl_fini(net);
  3626. xfrm_policy_fini(net);
  3627. xfrm_state_fini(net);
  3628. xfrm_statistics_fini(net);
  3629. }
  3630. static struct pernet_operations __net_initdata xfrm_net_ops = {
  3631. .init = xfrm_net_init,
  3632. .exit = xfrm_net_exit,
  3633. };
  3634. static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
  3635. {
  3636. .key_id = FLOW_DISSECTOR_KEY_CONTROL,
  3637. .offset = offsetof(struct xfrm_flow_keys, control),
  3638. },
  3639. {
  3640. .key_id = FLOW_DISSECTOR_KEY_BASIC,
  3641. .offset = offsetof(struct xfrm_flow_keys, basic),
  3642. },
  3643. {
  3644. .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
  3645. .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
  3646. },
  3647. {
  3648. .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
  3649. .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
  3650. },
  3651. {
  3652. .key_id = FLOW_DISSECTOR_KEY_PORTS,
  3653. .offset = offsetof(struct xfrm_flow_keys, ports),
  3654. },
  3655. {
  3656. .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
  3657. .offset = offsetof(struct xfrm_flow_keys, gre),
  3658. },
  3659. {
  3660. .key_id = FLOW_DISSECTOR_KEY_IP,
  3661. .offset = offsetof(struct xfrm_flow_keys, ip),
  3662. },
  3663. {
  3664. .key_id = FLOW_DISSECTOR_KEY_ICMP,
  3665. .offset = offsetof(struct xfrm_flow_keys, icmp),
  3666. },
  3667. };
  3668. void __init xfrm_init(void)
  3669. {
  3670. skb_flow_dissector_init(&xfrm_session_dissector,
  3671. xfrm_flow_dissector_keys,
  3672. ARRAY_SIZE(xfrm_flow_dissector_keys));
  3673. register_pernet_subsys(&xfrm_net_ops);
  3674. xfrm_dev_init();
  3675. xfrm_input_init();
  3676. #ifdef CONFIG_XFRM_ESPINTCP
  3677. espintcp_init();
  3678. #endif
  3679. register_xfrm_state_bpf();
  3680. xfrm_nat_keepalive_init(AF_INET);
  3681. }
  3682. #ifdef CONFIG_AUDITSYSCALL
  3683. static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
  3684. struct audit_buffer *audit_buf)
  3685. {
  3686. struct xfrm_sec_ctx *ctx = xp->security;
  3687. struct xfrm_selector *sel = &xp->selector;
  3688. if (ctx)
  3689. audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
  3690. ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
  3691. switch (sel->family) {
  3692. case AF_INET:
  3693. audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
  3694. if (sel->prefixlen_s != 32)
  3695. audit_log_format(audit_buf, " src_prefixlen=%d",
  3696. sel->prefixlen_s);
  3697. audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
  3698. if (sel->prefixlen_d != 32)
  3699. audit_log_format(audit_buf, " dst_prefixlen=%d",
  3700. sel->prefixlen_d);
  3701. break;
  3702. case AF_INET6:
  3703. audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
  3704. if (sel->prefixlen_s != 128)
  3705. audit_log_format(audit_buf, " src_prefixlen=%d",
  3706. sel->prefixlen_s);
  3707. audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
  3708. if (sel->prefixlen_d != 128)
  3709. audit_log_format(audit_buf, " dst_prefixlen=%d",
  3710. sel->prefixlen_d);
  3711. break;
  3712. }
  3713. }
  3714. void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
  3715. {
  3716. struct audit_buffer *audit_buf;
  3717. audit_buf = xfrm_audit_start("SPD-add");
  3718. if (audit_buf == NULL)
  3719. return;
  3720. xfrm_audit_helper_usrinfo(task_valid, audit_buf);
  3721. audit_log_format(audit_buf, " res=%u", result);
  3722. xfrm_audit_common_policyinfo(xp, audit_buf);
  3723. audit_log_end(audit_buf);
  3724. }
  3725. EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
  3726. void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
  3727. bool task_valid)
  3728. {
  3729. struct audit_buffer *audit_buf;
  3730. audit_buf = xfrm_audit_start("SPD-delete");
  3731. if (audit_buf == NULL)
  3732. return;
  3733. xfrm_audit_helper_usrinfo(task_valid, audit_buf);
  3734. audit_log_format(audit_buf, " res=%u", result);
  3735. xfrm_audit_common_policyinfo(xp, audit_buf);
  3736. audit_log_end(audit_buf);
  3737. }
  3738. EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
  3739. #endif
  3740. #ifdef CONFIG_XFRM_MIGRATE
  3741. static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
  3742. u8 dir, u8 type, struct net *net, u32 if_id)
  3743. {
  3744. struct xfrm_policy *pol;
  3745. struct flowi fl;
  3746. memset(&fl, 0, sizeof(fl));
  3747. fl.flowi_proto = sel->proto;
  3748. switch (sel->family) {
  3749. case AF_INET:
  3750. fl.u.ip4.saddr = sel->saddr.a4;
  3751. fl.u.ip4.daddr = sel->daddr.a4;
  3752. if (sel->proto == IPSEC_ULPROTO_ANY)
  3753. break;
  3754. fl.u.flowi4_oif = sel->ifindex;
  3755. fl.u.ip4.fl4_sport = sel->sport;
  3756. fl.u.ip4.fl4_dport = sel->dport;
  3757. break;
  3758. case AF_INET6:
  3759. fl.u.ip6.saddr = sel->saddr.in6;
  3760. fl.u.ip6.daddr = sel->daddr.in6;
  3761. if (sel->proto == IPSEC_ULPROTO_ANY)
  3762. break;
  3763. fl.u.flowi6_oif = sel->ifindex;
  3764. fl.u.ip6.fl4_sport = sel->sport;
  3765. fl.u.ip6.fl4_dport = sel->dport;
  3766. break;
  3767. default:
  3768. return ERR_PTR(-EAFNOSUPPORT);
  3769. }
  3770. rcu_read_lock();
  3771. pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
  3772. if (IS_ERR_OR_NULL(pol))
  3773. goto out_unlock;
  3774. if (!xfrm_pol_hold_rcu(pol))
  3775. pol = NULL;
  3776. out_unlock:
  3777. rcu_read_unlock();
  3778. return pol;
  3779. }
  3780. static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
  3781. {
  3782. int match = 0;
  3783. if (t->mode == m->mode && t->id.proto == m->proto &&
  3784. (m->reqid == 0 || t->reqid == m->reqid)) {
  3785. switch (t->mode) {
  3786. case XFRM_MODE_TUNNEL:
  3787. case XFRM_MODE_BEET:
  3788. if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
  3789. m->old_family) &&
  3790. xfrm_addr_equal(&t->saddr, &m->old_saddr,
  3791. m->old_family)) {
  3792. match = 1;
  3793. }
  3794. break;
  3795. case XFRM_MODE_TRANSPORT:
  3796. /* in case of transport mode, template does not store
  3797. any IP addresses, hence we just compare mode and
  3798. protocol */
  3799. match = 1;
  3800. break;
  3801. default:
  3802. break;
  3803. }
  3804. }
  3805. return match;
  3806. }
  3807. /* update endpoint address(es) of template(s) */
  3808. static int xfrm_policy_migrate(struct xfrm_policy *pol,
  3809. struct xfrm_migrate *m, int num_migrate,
  3810. struct netlink_ext_ack *extack)
  3811. {
  3812. struct xfrm_migrate *mp;
  3813. int i, j, n = 0;
  3814. write_lock_bh(&pol->lock);
  3815. if (unlikely(pol->walk.dead)) {
  3816. /* target policy has been deleted */
  3817. NL_SET_ERR_MSG(extack, "Target policy not found");
  3818. write_unlock_bh(&pol->lock);
  3819. return -ENOENT;
  3820. }
  3821. for (i = 0; i < pol->xfrm_nr; i++) {
  3822. for (j = 0, mp = m; j < num_migrate; j++, mp++) {
  3823. if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
  3824. continue;
  3825. n++;
  3826. if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
  3827. pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
  3828. continue;
  3829. /* update endpoints */
  3830. memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
  3831. sizeof(pol->xfrm_vec[i].id.daddr));
  3832. memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
  3833. sizeof(pol->xfrm_vec[i].saddr));
  3834. pol->xfrm_vec[i].encap_family = mp->new_family;
  3835. /* flush bundles */
  3836. atomic_inc(&pol->genid);
  3837. }
  3838. }
  3839. write_unlock_bh(&pol->lock);
  3840. if (!n)
  3841. return -ENODATA;
  3842. return 0;
  3843. }
  3844. static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate,
  3845. struct netlink_ext_ack *extack)
  3846. {
  3847. int i, j;
  3848. if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) {
  3849. NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)");
  3850. return -EINVAL;
  3851. }
  3852. for (i = 0; i < num_migrate; i++) {
  3853. if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
  3854. xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) {
  3855. NL_SET_ERR_MSG(extack, "Addresses in the MIGRATE attribute's list cannot be null");
  3856. return -EINVAL;
  3857. }
  3858. /* check if there is any duplicated entry */
  3859. for (j = i + 1; j < num_migrate; j++) {
  3860. if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
  3861. sizeof(m[i].old_daddr)) &&
  3862. !memcmp(&m[i].old_saddr, &m[j].old_saddr,
  3863. sizeof(m[i].old_saddr)) &&
  3864. m[i].proto == m[j].proto &&
  3865. m[i].mode == m[j].mode &&
  3866. m[i].reqid == m[j].reqid &&
  3867. m[i].old_family == m[j].old_family) {
  3868. NL_SET_ERR_MSG(extack, "Entries in the MIGRATE attribute's list must be unique");
  3869. return -EINVAL;
  3870. }
  3871. }
  3872. }
  3873. return 0;
  3874. }
  3875. int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
  3876. struct xfrm_migrate *m, int num_migrate,
  3877. struct xfrm_kmaddress *k, struct net *net,
  3878. struct xfrm_encap_tmpl *encap, u32 if_id,
  3879. struct netlink_ext_ack *extack)
  3880. {
  3881. int i, err, nx_cur = 0, nx_new = 0;
  3882. struct xfrm_policy *pol = NULL;
  3883. struct xfrm_state *x, *xc;
  3884. struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
  3885. struct xfrm_state *x_new[XFRM_MAX_DEPTH];
  3886. struct xfrm_migrate *mp;
  3887. /* Stage 0 - sanity checks */
  3888. err = xfrm_migrate_check(m, num_migrate, extack);
  3889. if (err < 0)
  3890. goto out;
  3891. if (dir >= XFRM_POLICY_MAX) {
  3892. NL_SET_ERR_MSG(extack, "Invalid policy direction");
  3893. err = -EINVAL;
  3894. goto out;
  3895. }
  3896. /* Stage 1 - find policy */
  3897. pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
  3898. if (IS_ERR_OR_NULL(pol)) {
  3899. NL_SET_ERR_MSG(extack, "Target policy not found");
  3900. err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
  3901. goto out;
  3902. }
  3903. /* Stage 2 - find and update state(s) */
  3904. for (i = 0, mp = m; i < num_migrate; i++, mp++) {
  3905. if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
  3906. x_cur[nx_cur] = x;
  3907. nx_cur++;
  3908. xc = xfrm_state_migrate(x, mp, encap);
  3909. if (xc) {
  3910. x_new[nx_new] = xc;
  3911. nx_new++;
  3912. } else {
  3913. err = -ENODATA;
  3914. goto restore_state;
  3915. }
  3916. }
  3917. }
  3918. /* Stage 3 - update policy */
  3919. err = xfrm_policy_migrate(pol, m, num_migrate, extack);
  3920. if (err < 0)
  3921. goto restore_state;
  3922. /* Stage 4 - delete old state(s) */
  3923. if (nx_cur) {
  3924. xfrm_states_put(x_cur, nx_cur);
  3925. xfrm_states_delete(x_cur, nx_cur);
  3926. }
  3927. /* Stage 5 - announce */
  3928. km_migrate(sel, dir, type, m, num_migrate, k, encap);
  3929. xfrm_pol_put(pol);
  3930. return 0;
  3931. out:
  3932. return err;
  3933. restore_state:
  3934. if (pol)
  3935. xfrm_pol_put(pol);
  3936. if (nx_cur)
  3937. xfrm_states_put(x_cur, nx_cur);
  3938. if (nx_new)
  3939. xfrm_states_delete(x_new, nx_new);
  3940. return err;
  3941. }
  3942. EXPORT_SYMBOL(xfrm_migrate);
  3943. #endif