drbd_nl.c 143 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. drbd_nl.c
  4. This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
  5. Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
  6. Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
  7. Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/module.h>
  11. #include <linux/drbd.h>
  12. #include <linux/in.h>
  13. #include <linux/fs.h>
  14. #include <linux/file.h>
  15. #include <linux/slab.h>
  16. #include <linux/blkpg.h>
  17. #include <linux/cpumask.h>
  18. #include "drbd_int.h"
  19. #include "drbd_protocol.h"
  20. #include "drbd_req.h"
  21. #include "drbd_state_change.h"
  22. #include <linux/unaligned.h>
  23. #include <linux/drbd_limits.h>
  24. #include <linux/kthread.h>
  25. #include <net/genetlink.h>
  26. /* .doit */
  27. // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  28. // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  29. int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  30. int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  31. int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  32. int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  33. int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  34. int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  35. int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  36. int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  37. int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  38. int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  39. int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  40. int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  41. int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  42. int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  43. int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  44. int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  45. int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  46. int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  47. int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  48. int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  49. int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  50. int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  51. int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  52. int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  53. int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  54. /* .dumpit */
  55. int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  56. int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
  57. int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
  58. int drbd_adm_dump_devices_done(struct netlink_callback *cb);
  59. int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
  60. int drbd_adm_dump_connections_done(struct netlink_callback *cb);
  61. int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
  62. int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
  63. int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
  64. #include <linux/drbd_genl_api.h>
  65. #include "drbd_nla.h"
  66. #include <linux/genl_magic_func.h>
  67. static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
  68. static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
  69. DEFINE_MUTEX(notification_mutex);
  70. /* used bdev_open_by_path, to claim our meta data device(s) */
  71. static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  72. static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
  73. {
  74. genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
  75. if (genlmsg_reply(skb, info))
  76. pr_err("error sending genl reply\n");
  77. }
  78. /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
  79. * reason it could fail was no space in skb, and there are 4k available. */
  80. static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
  81. {
  82. struct nlattr *nla;
  83. int err = -EMSGSIZE;
  84. if (!info || !info[0])
  85. return 0;
  86. nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
  87. if (!nla)
  88. return err;
  89. err = nla_put_string(skb, T_info_text, info);
  90. if (err) {
  91. nla_nest_cancel(skb, nla);
  92. return err;
  93. } else
  94. nla_nest_end(skb, nla);
  95. return 0;
  96. }
  97. __printf(2, 3)
  98. static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
  99. {
  100. va_list args;
  101. struct nlattr *nla, *txt;
  102. int err = -EMSGSIZE;
  103. int len;
  104. nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
  105. if (!nla)
  106. return err;
  107. txt = nla_reserve(skb, T_info_text, 256);
  108. if (!txt) {
  109. nla_nest_cancel(skb, nla);
  110. return err;
  111. }
  112. va_start(args, fmt);
  113. len = vscnprintf(nla_data(txt), 256, fmt, args);
  114. va_end(args);
  115. /* maybe: retry with larger reserve, if truncated */
  116. txt->nla_len = nla_attr_size(len+1);
  117. nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
  118. nla_nest_end(skb, nla);
  119. return 0;
  120. }
  121. /* This would be a good candidate for a "pre_doit" hook,
  122. * and per-family private info->pointers.
  123. * But we need to stay compatible with older kernels.
  124. * If it returns successfully, adm_ctx members are valid.
  125. *
  126. * At this point, we still rely on the global genl_lock().
  127. * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
  128. * to add additional synchronization against object destruction/modification.
  129. */
  130. #define DRBD_ADM_NEED_MINOR 1
  131. #define DRBD_ADM_NEED_RESOURCE 2
  132. #define DRBD_ADM_NEED_CONNECTION 4
  133. static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
  134. struct sk_buff *skb, struct genl_info *info, unsigned flags)
  135. {
  136. struct drbd_genlmsghdr *d_in = genl_info_userhdr(info);
  137. const u8 cmd = info->genlhdr->cmd;
  138. int err;
  139. memset(adm_ctx, 0, sizeof(*adm_ctx));
  140. /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
  141. if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
  142. return -EPERM;
  143. adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
  144. if (!adm_ctx->reply_skb) {
  145. err = -ENOMEM;
  146. goto fail;
  147. }
  148. adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
  149. info, &drbd_genl_family, 0, cmd);
  150. /* put of a few bytes into a fresh skb of >= 4k will always succeed.
  151. * but anyways */
  152. if (!adm_ctx->reply_dh) {
  153. err = -ENOMEM;
  154. goto fail;
  155. }
  156. adm_ctx->reply_dh->minor = d_in->minor;
  157. adm_ctx->reply_dh->ret_code = NO_ERROR;
  158. adm_ctx->volume = VOLUME_UNSPECIFIED;
  159. if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
  160. struct nlattr *nla;
  161. /* parse and validate only */
  162. err = drbd_cfg_context_from_attrs(NULL, info);
  163. if (err)
  164. goto fail;
  165. /* It was present, and valid,
  166. * copy it over to the reply skb. */
  167. err = nla_put_nohdr(adm_ctx->reply_skb,
  168. info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
  169. info->attrs[DRBD_NLA_CFG_CONTEXT]);
  170. if (err)
  171. goto fail;
  172. /* and assign stuff to the adm_ctx */
  173. nla = nested_attr_tb[__nla_type(T_ctx_volume)];
  174. if (nla)
  175. adm_ctx->volume = nla_get_u32(nla);
  176. nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
  177. if (nla)
  178. adm_ctx->resource_name = nla_data(nla);
  179. adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
  180. adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
  181. if ((adm_ctx->my_addr &&
  182. nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
  183. (adm_ctx->peer_addr &&
  184. nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
  185. err = -EINVAL;
  186. goto fail;
  187. }
  188. }
  189. adm_ctx->minor = d_in->minor;
  190. adm_ctx->device = minor_to_device(d_in->minor);
  191. /* We are protected by the global genl_lock().
  192. * But we may explicitly drop it/retake it in drbd_adm_set_role(),
  193. * so make sure this object stays around. */
  194. if (adm_ctx->device)
  195. kref_get(&adm_ctx->device->kref);
  196. if (adm_ctx->resource_name) {
  197. adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
  198. }
  199. if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
  200. drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
  201. return ERR_MINOR_INVALID;
  202. }
  203. if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
  204. drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
  205. if (adm_ctx->resource_name)
  206. return ERR_RES_NOT_KNOWN;
  207. return ERR_INVALID_REQUEST;
  208. }
  209. if (flags & DRBD_ADM_NEED_CONNECTION) {
  210. if (adm_ctx->resource) {
  211. drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
  212. return ERR_INVALID_REQUEST;
  213. }
  214. if (adm_ctx->device) {
  215. drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
  216. return ERR_INVALID_REQUEST;
  217. }
  218. if (adm_ctx->my_addr && adm_ctx->peer_addr)
  219. adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
  220. nla_len(adm_ctx->my_addr),
  221. nla_data(adm_ctx->peer_addr),
  222. nla_len(adm_ctx->peer_addr));
  223. if (!adm_ctx->connection) {
  224. drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
  225. return ERR_INVALID_REQUEST;
  226. }
  227. }
  228. /* some more paranoia, if the request was over-determined */
  229. if (adm_ctx->device && adm_ctx->resource &&
  230. adm_ctx->device->resource != adm_ctx->resource) {
  231. pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
  232. adm_ctx->minor, adm_ctx->resource->name,
  233. adm_ctx->device->resource->name);
  234. drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
  235. return ERR_INVALID_REQUEST;
  236. }
  237. if (adm_ctx->device &&
  238. adm_ctx->volume != VOLUME_UNSPECIFIED &&
  239. adm_ctx->volume != adm_ctx->device->vnr) {
  240. pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
  241. adm_ctx->minor, adm_ctx->volume,
  242. adm_ctx->device->vnr, adm_ctx->device->resource->name);
  243. drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
  244. return ERR_INVALID_REQUEST;
  245. }
  246. /* still, provide adm_ctx->resource always, if possible. */
  247. if (!adm_ctx->resource) {
  248. adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
  249. : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
  250. if (adm_ctx->resource)
  251. kref_get(&adm_ctx->resource->kref);
  252. }
  253. return NO_ERROR;
  254. fail:
  255. nlmsg_free(adm_ctx->reply_skb);
  256. adm_ctx->reply_skb = NULL;
  257. return err;
  258. }
  259. static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
  260. struct genl_info *info, int retcode)
  261. {
  262. if (adm_ctx->device) {
  263. kref_put(&adm_ctx->device->kref, drbd_destroy_device);
  264. adm_ctx->device = NULL;
  265. }
  266. if (adm_ctx->connection) {
  267. kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
  268. adm_ctx->connection = NULL;
  269. }
  270. if (adm_ctx->resource) {
  271. kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
  272. adm_ctx->resource = NULL;
  273. }
  274. if (!adm_ctx->reply_skb)
  275. return -ENOMEM;
  276. adm_ctx->reply_dh->ret_code = retcode;
  277. drbd_adm_send_reply(adm_ctx->reply_skb, info);
  278. return 0;
  279. }
  280. static void setup_khelper_env(struct drbd_connection *connection, char **envp)
  281. {
  282. char *afs;
  283. /* FIXME: A future version will not allow this case. */
  284. if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
  285. return;
  286. switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
  287. case AF_INET6:
  288. afs = "ipv6";
  289. snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
  290. &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
  291. break;
  292. case AF_INET:
  293. afs = "ipv4";
  294. snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
  295. &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
  296. break;
  297. default:
  298. afs = "ssocks";
  299. snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
  300. &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
  301. }
  302. snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
  303. }
  304. int drbd_khelper(struct drbd_device *device, char *cmd)
  305. {
  306. char *envp[] = { "HOME=/",
  307. "TERM=linux",
  308. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  309. (char[20]) { }, /* address family */
  310. (char[60]) { }, /* address */
  311. NULL };
  312. char mb[14];
  313. char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
  314. struct drbd_connection *connection = first_peer_device(device)->connection;
  315. struct sib_info sib;
  316. int ret;
  317. if (current == connection->worker.task)
  318. set_bit(CALLBACK_PENDING, &connection->flags);
  319. snprintf(mb, 14, "minor-%d", device_to_minor(device));
  320. setup_khelper_env(connection, envp);
  321. /* The helper may take some time.
  322. * write out any unsynced meta data changes now */
  323. drbd_md_sync(device);
  324. drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
  325. sib.sib_reason = SIB_HELPER_PRE;
  326. sib.helper_name = cmd;
  327. drbd_bcast_event(device, &sib);
  328. notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
  329. ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
  330. if (ret)
  331. drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
  332. drbd_usermode_helper, cmd, mb,
  333. (ret >> 8) & 0xff, ret);
  334. else
  335. drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
  336. drbd_usermode_helper, cmd, mb,
  337. (ret >> 8) & 0xff, ret);
  338. sib.sib_reason = SIB_HELPER_POST;
  339. sib.helper_exit_code = ret;
  340. drbd_bcast_event(device, &sib);
  341. notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
  342. if (current == connection->worker.task)
  343. clear_bit(CALLBACK_PENDING, &connection->flags);
  344. if (ret < 0) /* Ignore any ERRNOs we got. */
  345. ret = 0;
  346. return ret;
  347. }
  348. enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
  349. {
  350. char *envp[] = { "HOME=/",
  351. "TERM=linux",
  352. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  353. (char[20]) { }, /* address family */
  354. (char[60]) { }, /* address */
  355. NULL };
  356. char *resource_name = connection->resource->name;
  357. char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
  358. int ret;
  359. setup_khelper_env(connection, envp);
  360. conn_md_sync(connection);
  361. drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
  362. /* TODO: conn_bcast_event() ?? */
  363. notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
  364. ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
  365. if (ret)
  366. drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
  367. drbd_usermode_helper, cmd, resource_name,
  368. (ret >> 8) & 0xff, ret);
  369. else
  370. drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
  371. drbd_usermode_helper, cmd, resource_name,
  372. (ret >> 8) & 0xff, ret);
  373. /* TODO: conn_bcast_event() ?? */
  374. notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
  375. if (ret < 0) /* Ignore any ERRNOs we got. */
  376. ret = 0;
  377. return ret;
  378. }
  379. static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
  380. {
  381. enum drbd_fencing_p fp = FP_NOT_AVAIL;
  382. struct drbd_peer_device *peer_device;
  383. int vnr;
  384. rcu_read_lock();
  385. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  386. struct drbd_device *device = peer_device->device;
  387. if (get_ldev_if_state(device, D_CONSISTENT)) {
  388. struct disk_conf *disk_conf =
  389. rcu_dereference(peer_device->device->ldev->disk_conf);
  390. fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
  391. put_ldev(device);
  392. }
  393. }
  394. rcu_read_unlock();
  395. return fp;
  396. }
  397. static bool resource_is_supended(struct drbd_resource *resource)
  398. {
  399. return resource->susp || resource->susp_fen || resource->susp_nod;
  400. }
  401. bool conn_try_outdate_peer(struct drbd_connection *connection)
  402. {
  403. struct drbd_resource * const resource = connection->resource;
  404. unsigned int connect_cnt;
  405. union drbd_state mask = { };
  406. union drbd_state val = { };
  407. enum drbd_fencing_p fp;
  408. char *ex_to_string;
  409. int r;
  410. spin_lock_irq(&resource->req_lock);
  411. if (connection->cstate >= C_WF_REPORT_PARAMS) {
  412. drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
  413. spin_unlock_irq(&resource->req_lock);
  414. return false;
  415. }
  416. connect_cnt = connection->connect_cnt;
  417. spin_unlock_irq(&resource->req_lock);
  418. fp = highest_fencing_policy(connection);
  419. switch (fp) {
  420. case FP_NOT_AVAIL:
  421. drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
  422. spin_lock_irq(&resource->req_lock);
  423. if (connection->cstate < C_WF_REPORT_PARAMS) {
  424. _conn_request_state(connection,
  425. (union drbd_state) { { .susp_fen = 1 } },
  426. (union drbd_state) { { .susp_fen = 0 } },
  427. CS_VERBOSE | CS_HARD | CS_DC_SUSP);
  428. /* We are no longer suspended due to the fencing policy.
  429. * We may still be suspended due to the on-no-data-accessible policy.
  430. * If that was OND_IO_ERROR, fail pending requests. */
  431. if (!resource_is_supended(resource))
  432. _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
  433. }
  434. /* Else: in case we raced with a connection handshake,
  435. * let the handshake figure out if we maybe can RESEND,
  436. * and do not resume/fail pending requests here.
  437. * Worst case is we stay suspended for now, which may be
  438. * resolved by either re-establishing the replication link, or
  439. * the next link failure, or eventually the administrator. */
  440. spin_unlock_irq(&resource->req_lock);
  441. return false;
  442. case FP_DONT_CARE:
  443. return true;
  444. default: ;
  445. }
  446. r = conn_khelper(connection, "fence-peer");
  447. switch ((r>>8) & 0xff) {
  448. case P_INCONSISTENT: /* peer is inconsistent */
  449. ex_to_string = "peer is inconsistent or worse";
  450. mask.pdsk = D_MASK;
  451. val.pdsk = D_INCONSISTENT;
  452. break;
  453. case P_OUTDATED: /* peer got outdated, or was already outdated */
  454. ex_to_string = "peer was fenced";
  455. mask.pdsk = D_MASK;
  456. val.pdsk = D_OUTDATED;
  457. break;
  458. case P_DOWN: /* peer was down */
  459. if (conn_highest_disk(connection) == D_UP_TO_DATE) {
  460. /* we will(have) create(d) a new UUID anyways... */
  461. ex_to_string = "peer is unreachable, assumed to be dead";
  462. mask.pdsk = D_MASK;
  463. val.pdsk = D_OUTDATED;
  464. } else {
  465. ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
  466. }
  467. break;
  468. case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
  469. * This is useful when an unconnected R_SECONDARY is asked to
  470. * become R_PRIMARY, but finds the other peer being active. */
  471. ex_to_string = "peer is active";
  472. drbd_warn(connection, "Peer is primary, outdating myself.\n");
  473. mask.disk = D_MASK;
  474. val.disk = D_OUTDATED;
  475. break;
  476. case P_FENCING:
  477. /* THINK: do we need to handle this
  478. * like case 4, or more like case 5? */
  479. if (fp != FP_STONITH)
  480. drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
  481. ex_to_string = "peer was stonithed";
  482. mask.pdsk = D_MASK;
  483. val.pdsk = D_OUTDATED;
  484. break;
  485. default:
  486. /* The script is broken ... */
  487. drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
  488. return false; /* Eventually leave IO frozen */
  489. }
  490. drbd_info(connection, "fence-peer helper returned %d (%s)\n",
  491. (r>>8) & 0xff, ex_to_string);
  492. /* Not using
  493. conn_request_state(connection, mask, val, CS_VERBOSE);
  494. here, because we might were able to re-establish the connection in the
  495. meantime. */
  496. spin_lock_irq(&resource->req_lock);
  497. if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
  498. if (connection->connect_cnt != connect_cnt)
  499. /* In case the connection was established and droped
  500. while the fence-peer handler was running, ignore it */
  501. drbd_info(connection, "Ignoring fence-peer exit code\n");
  502. else
  503. _conn_request_state(connection, mask, val, CS_VERBOSE);
  504. }
  505. spin_unlock_irq(&resource->req_lock);
  506. return conn_highest_pdsk(connection) <= D_OUTDATED;
  507. }
  508. static int _try_outdate_peer_async(void *data)
  509. {
  510. struct drbd_connection *connection = (struct drbd_connection *)data;
  511. conn_try_outdate_peer(connection);
  512. kref_put(&connection->kref, drbd_destroy_connection);
  513. return 0;
  514. }
  515. void conn_try_outdate_peer_async(struct drbd_connection *connection)
  516. {
  517. struct task_struct *opa;
  518. kref_get(&connection->kref);
  519. /* We may have just sent a signal to this thread
  520. * to get it out of some blocking network function.
  521. * Clear signals; otherwise kthread_run(), which internally uses
  522. * wait_on_completion_killable(), will mistake our pending signal
  523. * for a new fatal signal and fail. */
  524. flush_signals(current);
  525. opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
  526. if (IS_ERR(opa)) {
  527. drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
  528. kref_put(&connection->kref, drbd_destroy_connection);
  529. }
  530. }
  531. enum drbd_state_rv
  532. drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
  533. {
  534. struct drbd_peer_device *const peer_device = first_peer_device(device);
  535. struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
  536. const int max_tries = 4;
  537. enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
  538. struct net_conf *nc;
  539. int try = 0;
  540. int forced = 0;
  541. union drbd_state mask, val;
  542. if (new_role == R_PRIMARY) {
  543. struct drbd_connection *connection;
  544. /* Detect dead peers as soon as possible. */
  545. rcu_read_lock();
  546. for_each_connection(connection, device->resource)
  547. request_ping(connection);
  548. rcu_read_unlock();
  549. }
  550. mutex_lock(device->state_mutex);
  551. mask.i = 0; mask.role = R_MASK;
  552. val.i = 0; val.role = new_role;
  553. while (try++ < max_tries) {
  554. rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
  555. /* in case we first succeeded to outdate,
  556. * but now suddenly could establish a connection */
  557. if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
  558. val.pdsk = 0;
  559. mask.pdsk = 0;
  560. continue;
  561. }
  562. if (rv == SS_NO_UP_TO_DATE_DISK && force &&
  563. (device->state.disk < D_UP_TO_DATE &&
  564. device->state.disk >= D_INCONSISTENT)) {
  565. mask.disk = D_MASK;
  566. val.disk = D_UP_TO_DATE;
  567. forced = 1;
  568. continue;
  569. }
  570. if (rv == SS_NO_UP_TO_DATE_DISK &&
  571. device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
  572. D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
  573. if (conn_try_outdate_peer(connection)) {
  574. val.disk = D_UP_TO_DATE;
  575. mask.disk = D_MASK;
  576. }
  577. continue;
  578. }
  579. if (rv == SS_NOTHING_TO_DO)
  580. goto out;
  581. if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
  582. if (!conn_try_outdate_peer(connection) && force) {
  583. drbd_warn(device, "Forced into split brain situation!\n");
  584. mask.pdsk = D_MASK;
  585. val.pdsk = D_OUTDATED;
  586. }
  587. continue;
  588. }
  589. if (rv == SS_TWO_PRIMARIES) {
  590. /* Maybe the peer is detected as dead very soon...
  591. retry at most once more in this case. */
  592. if (try < max_tries) {
  593. int timeo;
  594. try = max_tries - 1;
  595. rcu_read_lock();
  596. nc = rcu_dereference(connection->net_conf);
  597. timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
  598. rcu_read_unlock();
  599. schedule_timeout_interruptible(timeo);
  600. }
  601. continue;
  602. }
  603. if (rv < SS_SUCCESS) {
  604. rv = _drbd_request_state(device, mask, val,
  605. CS_VERBOSE + CS_WAIT_COMPLETE);
  606. if (rv < SS_SUCCESS)
  607. goto out;
  608. }
  609. break;
  610. }
  611. if (rv < SS_SUCCESS)
  612. goto out;
  613. if (forced)
  614. drbd_warn(device, "Forced to consider local data as UpToDate!\n");
  615. /* Wait until nothing is on the fly :) */
  616. wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
  617. /* FIXME also wait for all pending P_BARRIER_ACK? */
  618. if (new_role == R_SECONDARY) {
  619. if (get_ldev(device)) {
  620. device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
  621. put_ldev(device);
  622. }
  623. } else {
  624. mutex_lock(&device->resource->conf_update);
  625. nc = connection->net_conf;
  626. if (nc)
  627. nc->discard_my_data = 0; /* without copy; single bit op is atomic */
  628. mutex_unlock(&device->resource->conf_update);
  629. if (get_ldev(device)) {
  630. if (((device->state.conn < C_CONNECTED ||
  631. device->state.pdsk <= D_FAILED)
  632. && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
  633. drbd_uuid_new_current(device);
  634. device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
  635. put_ldev(device);
  636. }
  637. }
  638. /* writeout of activity log covered areas of the bitmap
  639. * to stable storage done in after state change already */
  640. if (device->state.conn >= C_WF_REPORT_PARAMS) {
  641. /* if this was forced, we should consider sync */
  642. if (forced)
  643. drbd_send_uuids(peer_device);
  644. drbd_send_current_state(peer_device);
  645. }
  646. drbd_md_sync(device);
  647. set_disk_ro(device->vdisk, new_role == R_SECONDARY);
  648. kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
  649. out:
  650. mutex_unlock(device->state_mutex);
  651. return rv;
  652. }
  653. static const char *from_attrs_err_to_txt(int err)
  654. {
  655. return err == -ENOMSG ? "required attribute missing" :
  656. err == -EOPNOTSUPP ? "unknown mandatory attribute" :
  657. err == -EEXIST ? "can not change invariant setting" :
  658. "invalid attribute value";
  659. }
  660. int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
  661. {
  662. struct drbd_config_context adm_ctx;
  663. struct set_role_parms parms;
  664. int err;
  665. enum drbd_ret_code retcode;
  666. enum drbd_state_rv rv;
  667. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  668. if (!adm_ctx.reply_skb)
  669. return retcode;
  670. if (retcode != NO_ERROR)
  671. goto out;
  672. memset(&parms, 0, sizeof(parms));
  673. if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
  674. err = set_role_parms_from_attrs(&parms, info);
  675. if (err) {
  676. retcode = ERR_MANDATORY_TAG;
  677. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  678. goto out;
  679. }
  680. }
  681. genl_unlock();
  682. mutex_lock(&adm_ctx.resource->adm_mutex);
  683. if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
  684. rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
  685. else
  686. rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
  687. mutex_unlock(&adm_ctx.resource->adm_mutex);
  688. genl_lock();
  689. drbd_adm_finish(&adm_ctx, info, rv);
  690. return 0;
  691. out:
  692. drbd_adm_finish(&adm_ctx, info, retcode);
  693. return 0;
  694. }
  695. /* Initializes the md.*_offset members, so we are able to find
  696. * the on disk meta data.
  697. *
  698. * We currently have two possible layouts:
  699. * external:
  700. * |----------- md_size_sect ------------------|
  701. * [ 4k superblock ][ activity log ][ Bitmap ]
  702. * | al_offset == 8 |
  703. * | bm_offset = al_offset + X |
  704. * ==> bitmap sectors = md_size_sect - bm_offset
  705. *
  706. * internal:
  707. * |----------- md_size_sect ------------------|
  708. * [data.....][ Bitmap ][ activity log ][ 4k superblock ]
  709. * | al_offset < 0 |
  710. * | bm_offset = al_offset - Y |
  711. * ==> bitmap sectors = Y = al_offset - bm_offset
  712. *
  713. * Activity log size used to be fixed 32kB,
  714. * but is about to become configurable.
  715. */
  716. static void drbd_md_set_sector_offsets(struct drbd_device *device,
  717. struct drbd_backing_dev *bdev)
  718. {
  719. sector_t md_size_sect = 0;
  720. unsigned int al_size_sect = bdev->md.al_size_4k * 8;
  721. bdev->md.md_offset = drbd_md_ss(bdev);
  722. switch (bdev->md.meta_dev_idx) {
  723. default:
  724. /* v07 style fixed size indexed meta data */
  725. bdev->md.md_size_sect = MD_128MB_SECT;
  726. bdev->md.al_offset = MD_4kB_SECT;
  727. bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
  728. break;
  729. case DRBD_MD_INDEX_FLEX_EXT:
  730. /* just occupy the full device; unit: sectors */
  731. bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
  732. bdev->md.al_offset = MD_4kB_SECT;
  733. bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
  734. break;
  735. case DRBD_MD_INDEX_INTERNAL:
  736. case DRBD_MD_INDEX_FLEX_INT:
  737. /* al size is still fixed */
  738. bdev->md.al_offset = -al_size_sect;
  739. /* we need (slightly less than) ~ this much bitmap sectors: */
  740. md_size_sect = drbd_get_capacity(bdev->backing_bdev);
  741. md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
  742. md_size_sect = BM_SECT_TO_EXT(md_size_sect);
  743. md_size_sect = ALIGN(md_size_sect, 8);
  744. /* plus the "drbd meta data super block",
  745. * and the activity log; */
  746. md_size_sect += MD_4kB_SECT + al_size_sect;
  747. bdev->md.md_size_sect = md_size_sect;
  748. /* bitmap offset is adjusted by 'super' block size */
  749. bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT;
  750. break;
  751. }
  752. }
  753. /* input size is expected to be in KB */
  754. char *ppsize(char *buf, unsigned long long size)
  755. {
  756. /* Needs 9 bytes at max including trailing NUL:
  757. * -1ULL ==> "16384 EB" */
  758. static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
  759. int base = 0;
  760. while (size >= 10000 && base < sizeof(units)-1) {
  761. /* shift + round */
  762. size = (size >> 10) + !!(size & (1<<9));
  763. base++;
  764. }
  765. sprintf(buf, "%u %cB", (unsigned)size, units[base]);
  766. return buf;
  767. }
  768. /* there is still a theoretical deadlock when called from receiver
  769. * on an D_INCONSISTENT R_PRIMARY:
  770. * remote READ does inc_ap_bio, receiver would need to receive answer
  771. * packet from remote to dec_ap_bio again.
  772. * receiver receive_sizes(), comes here,
  773. * waits for ap_bio_cnt == 0. -> deadlock.
  774. * but this cannot happen, actually, because:
  775. * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
  776. * (not connected, or bad/no disk on peer):
  777. * see drbd_fail_request_early, ap_bio_cnt is zero.
  778. * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
  779. * peer may not initiate a resize.
  780. */
  781. /* Note these are not to be confused with
  782. * drbd_adm_suspend_io/drbd_adm_resume_io,
  783. * which are (sub) state changes triggered by admin (drbdsetup),
  784. * and can be long lived.
  785. * This changes an device->flag, is triggered by drbd internals,
  786. * and should be short-lived. */
  787. /* It needs to be a counter, since multiple threads might
  788. independently suspend and resume IO. */
  789. void drbd_suspend_io(struct drbd_device *device)
  790. {
  791. atomic_inc(&device->suspend_cnt);
  792. if (drbd_suspended(device))
  793. return;
  794. wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
  795. }
  796. void drbd_resume_io(struct drbd_device *device)
  797. {
  798. if (atomic_dec_and_test(&device->suspend_cnt))
  799. wake_up(&device->misc_wait);
  800. }
  801. /*
  802. * drbd_determine_dev_size() - Sets the right device size obeying all constraints
  803. * @device: DRBD device.
  804. *
  805. * Returns 0 on success, negative return values indicate errors.
  806. * You should call drbd_md_sync() after calling this function.
  807. */
  808. enum determine_dev_size
  809. drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
  810. {
  811. struct md_offsets_and_sizes {
  812. u64 last_agreed_sect;
  813. u64 md_offset;
  814. s32 al_offset;
  815. s32 bm_offset;
  816. u32 md_size_sect;
  817. u32 al_stripes;
  818. u32 al_stripe_size_4k;
  819. } prev;
  820. sector_t u_size, size;
  821. struct drbd_md *md = &device->ldev->md;
  822. void *buffer;
  823. int md_moved, la_size_changed;
  824. enum determine_dev_size rv = DS_UNCHANGED;
  825. /* We may change the on-disk offsets of our meta data below. Lock out
  826. * anything that may cause meta data IO, to avoid acting on incomplete
  827. * layout changes or scribbling over meta data that is in the process
  828. * of being moved.
  829. *
  830. * Move is not exactly correct, btw, currently we have all our meta
  831. * data in core memory, to "move" it we just write it all out, there
  832. * are no reads. */
  833. drbd_suspend_io(device);
  834. buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
  835. if (!buffer) {
  836. drbd_resume_io(device);
  837. return DS_ERROR;
  838. }
  839. /* remember current offset and sizes */
  840. prev.last_agreed_sect = md->la_size_sect;
  841. prev.md_offset = md->md_offset;
  842. prev.al_offset = md->al_offset;
  843. prev.bm_offset = md->bm_offset;
  844. prev.md_size_sect = md->md_size_sect;
  845. prev.al_stripes = md->al_stripes;
  846. prev.al_stripe_size_4k = md->al_stripe_size_4k;
  847. if (rs) {
  848. /* rs is non NULL if we should change the AL layout only */
  849. md->al_stripes = rs->al_stripes;
  850. md->al_stripe_size_4k = rs->al_stripe_size / 4;
  851. md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
  852. }
  853. drbd_md_set_sector_offsets(device, device->ldev);
  854. rcu_read_lock();
  855. u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
  856. rcu_read_unlock();
  857. size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
  858. if (size < prev.last_agreed_sect) {
  859. if (rs && u_size == 0) {
  860. /* Remove "rs &&" later. This check should always be active, but
  861. right now the receiver expects the permissive behavior */
  862. drbd_warn(device, "Implicit shrink not allowed. "
  863. "Use --size=%llus for explicit shrink.\n",
  864. (unsigned long long)size);
  865. rv = DS_ERROR_SHRINK;
  866. }
  867. if (u_size > size)
  868. rv = DS_ERROR_SPACE_MD;
  869. if (rv != DS_UNCHANGED)
  870. goto err_out;
  871. }
  872. if (get_capacity(device->vdisk) != size ||
  873. drbd_bm_capacity(device) != size) {
  874. int err;
  875. err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
  876. if (unlikely(err)) {
  877. /* currently there is only one error: ENOMEM! */
  878. size = drbd_bm_capacity(device);
  879. if (size == 0) {
  880. drbd_err(device, "OUT OF MEMORY! "
  881. "Could not allocate bitmap!\n");
  882. } else {
  883. drbd_err(device, "BM resizing failed. "
  884. "Leaving size unchanged\n");
  885. }
  886. rv = DS_ERROR;
  887. }
  888. /* racy, see comments above. */
  889. drbd_set_my_capacity(device, size);
  890. md->la_size_sect = size;
  891. }
  892. if (rv <= DS_ERROR)
  893. goto err_out;
  894. la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
  895. md_moved = prev.md_offset != md->md_offset
  896. || prev.md_size_sect != md->md_size_sect;
  897. if (la_size_changed || md_moved || rs) {
  898. u32 prev_flags;
  899. /* We do some synchronous IO below, which may take some time.
  900. * Clear the timer, to avoid scary "timer expired!" messages,
  901. * "Superblock" is written out at least twice below, anyways. */
  902. del_timer(&device->md_sync_timer);
  903. /* We won't change the "al-extents" setting, we just may need
  904. * to move the on-disk location of the activity log ringbuffer.
  905. * Lock for transaction is good enough, it may well be "dirty"
  906. * or even "starving". */
  907. wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
  908. /* mark current on-disk bitmap and activity log as unreliable */
  909. prev_flags = md->flags;
  910. md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
  911. drbd_md_write(device, buffer);
  912. drbd_al_initialize(device, buffer);
  913. drbd_info(device, "Writing the whole bitmap, %s\n",
  914. la_size_changed && md_moved ? "size changed and md moved" :
  915. la_size_changed ? "size changed" : "md moved");
  916. /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
  917. drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
  918. "size changed", BM_LOCKED_MASK, NULL);
  919. /* on-disk bitmap and activity log is authoritative again
  920. * (unless there was an IO error meanwhile...) */
  921. md->flags = prev_flags;
  922. drbd_md_write(device, buffer);
  923. if (rs)
  924. drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
  925. md->al_stripes, md->al_stripe_size_4k * 4);
  926. }
  927. if (size > prev.last_agreed_sect)
  928. rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
  929. if (size < prev.last_agreed_sect)
  930. rv = DS_SHRUNK;
  931. if (0) {
  932. err_out:
  933. /* restore previous offset and sizes */
  934. md->la_size_sect = prev.last_agreed_sect;
  935. md->md_offset = prev.md_offset;
  936. md->al_offset = prev.al_offset;
  937. md->bm_offset = prev.bm_offset;
  938. md->md_size_sect = prev.md_size_sect;
  939. md->al_stripes = prev.al_stripes;
  940. md->al_stripe_size_4k = prev.al_stripe_size_4k;
  941. md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
  942. }
  943. lc_unlock(device->act_log);
  944. wake_up(&device->al_wait);
  945. drbd_md_put_buffer(device);
  946. drbd_resume_io(device);
  947. return rv;
  948. }
  949. sector_t
  950. drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
  951. sector_t u_size, int assume_peer_has_space)
  952. {
  953. sector_t p_size = device->p_size; /* partner's disk size. */
  954. sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
  955. sector_t m_size; /* my size */
  956. sector_t size = 0;
  957. m_size = drbd_get_max_capacity(bdev);
  958. if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
  959. drbd_warn(device, "Resize while not connected was forced by the user!\n");
  960. p_size = m_size;
  961. }
  962. if (p_size && m_size) {
  963. size = min_t(sector_t, p_size, m_size);
  964. } else {
  965. if (la_size_sect) {
  966. size = la_size_sect;
  967. if (m_size && m_size < size)
  968. size = m_size;
  969. if (p_size && p_size < size)
  970. size = p_size;
  971. } else {
  972. if (m_size)
  973. size = m_size;
  974. if (p_size)
  975. size = p_size;
  976. }
  977. }
  978. if (size == 0)
  979. drbd_err(device, "Both nodes diskless!\n");
  980. if (u_size) {
  981. if (u_size > size)
  982. drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
  983. (unsigned long)u_size>>1, (unsigned long)size>>1);
  984. else
  985. size = u_size;
  986. }
  987. return size;
  988. }
  989. /*
  990. * drbd_check_al_size() - Ensures that the AL is of the right size
  991. * @device: DRBD device.
  992. *
  993. * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
  994. * failed, and 0 on success. You should call drbd_md_sync() after you called
  995. * this function.
  996. */
  997. static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
  998. {
  999. struct lru_cache *n, *t;
  1000. struct lc_element *e;
  1001. unsigned int in_use;
  1002. int i;
  1003. if (device->act_log &&
  1004. device->act_log->nr_elements == dc->al_extents)
  1005. return 0;
  1006. in_use = 0;
  1007. t = device->act_log;
  1008. n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
  1009. dc->al_extents, sizeof(struct lc_element), 0);
  1010. if (n == NULL) {
  1011. drbd_err(device, "Cannot allocate act_log lru!\n");
  1012. return -ENOMEM;
  1013. }
  1014. spin_lock_irq(&device->al_lock);
  1015. if (t) {
  1016. for (i = 0; i < t->nr_elements; i++) {
  1017. e = lc_element_by_index(t, i);
  1018. if (e->refcnt)
  1019. drbd_err(device, "refcnt(%d)==%d\n",
  1020. e->lc_number, e->refcnt);
  1021. in_use += e->refcnt;
  1022. }
  1023. }
  1024. if (!in_use)
  1025. device->act_log = n;
  1026. spin_unlock_irq(&device->al_lock);
  1027. if (in_use) {
  1028. drbd_err(device, "Activity log still in use!\n");
  1029. lc_destroy(n);
  1030. return -EBUSY;
  1031. } else {
  1032. lc_destroy(t);
  1033. }
  1034. drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
  1035. return 0;
  1036. }
  1037. static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
  1038. {
  1039. /*
  1040. * We may ignore peer limits if the peer is modern enough. From 8.3.8
  1041. * onwards the peer can use multiple BIOs for a single peer_request.
  1042. */
  1043. if (device->state.conn < C_WF_REPORT_PARAMS)
  1044. return device->peer_max_bio_size;
  1045. if (first_peer_device(device)->connection->agreed_pro_version < 94)
  1046. return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
  1047. /*
  1048. * Correct old drbd (up to 8.3.7) if it believes it can do more than
  1049. * 32KiB.
  1050. */
  1051. if (first_peer_device(device)->connection->agreed_pro_version == 94)
  1052. return DRBD_MAX_SIZE_H80_PACKET;
  1053. /*
  1054. * drbd 8.3.8 onwards, before 8.4.0
  1055. */
  1056. if (first_peer_device(device)->connection->agreed_pro_version < 100)
  1057. return DRBD_MAX_BIO_SIZE_P95;
  1058. return DRBD_MAX_BIO_SIZE;
  1059. }
  1060. static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
  1061. {
  1062. /* when we introduced REQ_WRITE_SAME support, we also bumped
  1063. * our maximum supported batch bio size used for discards. */
  1064. if (connection->agreed_features & DRBD_FF_WSAME)
  1065. return DRBD_MAX_BBIO_SECTORS;
  1066. /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
  1067. return AL_EXTENT_SIZE >> 9;
  1068. }
  1069. static bool drbd_discard_supported(struct drbd_connection *connection,
  1070. struct drbd_backing_dev *bdev)
  1071. {
  1072. if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
  1073. return false;
  1074. if (connection->cstate >= C_CONNECTED &&
  1075. !(connection->agreed_features & DRBD_FF_TRIM)) {
  1076. drbd_info(connection,
  1077. "peer DRBD too old, does not support TRIM: disabling discards\n");
  1078. return false;
  1079. }
  1080. return true;
  1081. }
  1082. /* This is the workaround for "bio would need to, but cannot, be split" */
  1083. static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
  1084. {
  1085. unsigned int max_segments;
  1086. rcu_read_lock();
  1087. max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
  1088. rcu_read_unlock();
  1089. if (!max_segments)
  1090. return BLK_MAX_SEGMENTS;
  1091. return max_segments;
  1092. }
  1093. void drbd_reconsider_queue_parameters(struct drbd_device *device,
  1094. struct drbd_backing_dev *bdev, struct o_qlim *o)
  1095. {
  1096. struct drbd_connection *connection =
  1097. first_peer_device(device)->connection;
  1098. struct request_queue * const q = device->rq_queue;
  1099. unsigned int now = queue_max_hw_sectors(q) << 9;
  1100. struct queue_limits lim;
  1101. struct request_queue *b = NULL;
  1102. unsigned int new;
  1103. if (bdev) {
  1104. b = bdev->backing_bdev->bd_disk->queue;
  1105. device->local_max_bio_size =
  1106. queue_max_hw_sectors(b) << SECTOR_SHIFT;
  1107. }
  1108. /*
  1109. * We may later detach and re-attach on a disconnected Primary. Avoid
  1110. * decreasing the value in this case.
  1111. *
  1112. * We want to store what we know the peer DRBD can handle, not what the
  1113. * peer IO backend can handle.
  1114. */
  1115. new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
  1116. max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
  1117. if (new != now) {
  1118. if (device->state.role == R_PRIMARY && new < now)
  1119. drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
  1120. new, now);
  1121. drbd_info(device, "max BIO size = %u\n", new);
  1122. }
  1123. lim = queue_limits_start_update(q);
  1124. if (bdev) {
  1125. blk_set_stacking_limits(&lim);
  1126. lim.max_segments = drbd_backing_dev_max_segments(device);
  1127. } else {
  1128. lim.max_segments = BLK_MAX_SEGMENTS;
  1129. }
  1130. lim.max_hw_sectors = new >> SECTOR_SHIFT;
  1131. lim.seg_boundary_mask = PAGE_SIZE - 1;
  1132. /*
  1133. * We don't care for the granularity, really.
  1134. *
  1135. * Stacking limits below should fix it for the local device. Whether or
  1136. * not it is a suitable granularity on the remote device is not our
  1137. * problem, really. If you care, you need to use devices with similar
  1138. * topology on all peers.
  1139. */
  1140. if (drbd_discard_supported(connection, bdev)) {
  1141. lim.discard_granularity = 512;
  1142. lim.max_hw_discard_sectors =
  1143. drbd_max_discard_sectors(connection);
  1144. } else {
  1145. lim.discard_granularity = 0;
  1146. lim.max_hw_discard_sectors = 0;
  1147. }
  1148. if (bdev)
  1149. blk_stack_limits(&lim, &b->limits, 0);
  1150. /*
  1151. * If we can handle "zeroes" efficiently on the protocol, we want to do
  1152. * that, even if our backend does not announce max_write_zeroes_sectors
  1153. * itself.
  1154. */
  1155. if (connection->agreed_features & DRBD_FF_WZEROES)
  1156. lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
  1157. else
  1158. lim.max_write_zeroes_sectors = 0;
  1159. if ((lim.discard_granularity >> SECTOR_SHIFT) >
  1160. lim.max_hw_discard_sectors) {
  1161. lim.discard_granularity = 0;
  1162. lim.max_hw_discard_sectors = 0;
  1163. }
  1164. if (queue_limits_commit_update(q, &lim))
  1165. drbd_err(device, "setting new queue limits failed\n");
  1166. }
  1167. /* Starts the worker thread */
  1168. static void conn_reconfig_start(struct drbd_connection *connection)
  1169. {
  1170. drbd_thread_start(&connection->worker);
  1171. drbd_flush_workqueue(&connection->sender_work);
  1172. }
  1173. /* if still unconfigured, stops worker again. */
  1174. static void conn_reconfig_done(struct drbd_connection *connection)
  1175. {
  1176. bool stop_threads;
  1177. spin_lock_irq(&connection->resource->req_lock);
  1178. stop_threads = conn_all_vols_unconf(connection) &&
  1179. connection->cstate == C_STANDALONE;
  1180. spin_unlock_irq(&connection->resource->req_lock);
  1181. if (stop_threads) {
  1182. /* ack_receiver thread and ack_sender workqueue are implicitly
  1183. * stopped by receiver in conn_disconnect() */
  1184. drbd_thread_stop(&connection->receiver);
  1185. drbd_thread_stop(&connection->worker);
  1186. }
  1187. }
  1188. /* Make sure IO is suspended before calling this function(). */
  1189. static void drbd_suspend_al(struct drbd_device *device)
  1190. {
  1191. int s = 0;
  1192. if (!lc_try_lock(device->act_log)) {
  1193. drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
  1194. return;
  1195. }
  1196. drbd_al_shrink(device);
  1197. spin_lock_irq(&device->resource->req_lock);
  1198. if (device->state.conn < C_CONNECTED)
  1199. s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
  1200. spin_unlock_irq(&device->resource->req_lock);
  1201. lc_unlock(device->act_log);
  1202. if (s)
  1203. drbd_info(device, "Suspended AL updates\n");
  1204. }
  1205. static bool should_set_defaults(struct genl_info *info)
  1206. {
  1207. struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
  1208. return 0 != (dh->flags & DRBD_GENL_F_SET_DEFAULTS);
  1209. }
  1210. static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
  1211. {
  1212. /* This is limited by 16 bit "slot" numbers,
  1213. * and by available on-disk context storage.
  1214. *
  1215. * Also (u16)~0 is special (denotes a "free" extent).
  1216. *
  1217. * One transaction occupies one 4kB on-disk block,
  1218. * we have n such blocks in the on disk ring buffer,
  1219. * the "current" transaction may fail (n-1),
  1220. * and there is 919 slot numbers context information per transaction.
  1221. *
  1222. * 72 transaction blocks amounts to more than 2**16 context slots,
  1223. * so cap there first.
  1224. */
  1225. const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
  1226. const unsigned int sufficient_on_disk =
  1227. (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
  1228. /AL_CONTEXT_PER_TRANSACTION;
  1229. unsigned int al_size_4k = bdev->md.al_size_4k;
  1230. if (al_size_4k > sufficient_on_disk)
  1231. return max_al_nr;
  1232. return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
  1233. }
  1234. static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
  1235. {
  1236. return a->disk_barrier != b->disk_barrier ||
  1237. a->disk_flushes != b->disk_flushes ||
  1238. a->disk_drain != b->disk_drain;
  1239. }
  1240. static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
  1241. struct drbd_backing_dev *nbc)
  1242. {
  1243. struct block_device *bdev = nbc->backing_bdev;
  1244. if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
  1245. disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
  1246. if (disk_conf->al_extents > drbd_al_extents_max(nbc))
  1247. disk_conf->al_extents = drbd_al_extents_max(nbc);
  1248. if (!bdev_max_discard_sectors(bdev)) {
  1249. if (disk_conf->rs_discard_granularity) {
  1250. disk_conf->rs_discard_granularity = 0; /* disable feature */
  1251. drbd_info(device, "rs_discard_granularity feature disabled\n");
  1252. }
  1253. }
  1254. if (disk_conf->rs_discard_granularity) {
  1255. int orig_value = disk_conf->rs_discard_granularity;
  1256. sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
  1257. unsigned int discard_granularity = bdev_discard_granularity(bdev);
  1258. int remainder;
  1259. if (discard_granularity > disk_conf->rs_discard_granularity)
  1260. disk_conf->rs_discard_granularity = discard_granularity;
  1261. remainder = disk_conf->rs_discard_granularity %
  1262. discard_granularity;
  1263. disk_conf->rs_discard_granularity += remainder;
  1264. if (disk_conf->rs_discard_granularity > discard_size)
  1265. disk_conf->rs_discard_granularity = discard_size;
  1266. if (disk_conf->rs_discard_granularity != orig_value)
  1267. drbd_info(device, "rs_discard_granularity changed to %d\n",
  1268. disk_conf->rs_discard_granularity);
  1269. }
  1270. }
  1271. static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
  1272. {
  1273. int err = -EBUSY;
  1274. if (device->act_log &&
  1275. device->act_log->nr_elements == dc->al_extents)
  1276. return 0;
  1277. drbd_suspend_io(device);
  1278. /* If IO completion is currently blocked, we would likely wait
  1279. * "forever" for the activity log to become unused. So we don't. */
  1280. if (atomic_read(&device->ap_bio_cnt))
  1281. goto out;
  1282. wait_event(device->al_wait, lc_try_lock(device->act_log));
  1283. drbd_al_shrink(device);
  1284. err = drbd_check_al_size(device, dc);
  1285. lc_unlock(device->act_log);
  1286. wake_up(&device->al_wait);
  1287. out:
  1288. drbd_resume_io(device);
  1289. return err;
  1290. }
  1291. int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
  1292. {
  1293. struct drbd_config_context adm_ctx;
  1294. enum drbd_ret_code retcode;
  1295. struct drbd_device *device;
  1296. struct disk_conf *new_disk_conf, *old_disk_conf;
  1297. struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
  1298. int err;
  1299. unsigned int fifo_size;
  1300. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  1301. if (!adm_ctx.reply_skb)
  1302. return retcode;
  1303. if (retcode != NO_ERROR)
  1304. goto finish;
  1305. device = adm_ctx.device;
  1306. mutex_lock(&adm_ctx.resource->adm_mutex);
  1307. /* we also need a disk
  1308. * to change the options on */
  1309. if (!get_ldev(device)) {
  1310. retcode = ERR_NO_DISK;
  1311. goto out;
  1312. }
  1313. new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
  1314. if (!new_disk_conf) {
  1315. retcode = ERR_NOMEM;
  1316. goto fail;
  1317. }
  1318. mutex_lock(&device->resource->conf_update);
  1319. old_disk_conf = device->ldev->disk_conf;
  1320. *new_disk_conf = *old_disk_conf;
  1321. if (should_set_defaults(info))
  1322. set_disk_conf_defaults(new_disk_conf);
  1323. err = disk_conf_from_attrs_for_change(new_disk_conf, info);
  1324. if (err && err != -ENOMSG) {
  1325. retcode = ERR_MANDATORY_TAG;
  1326. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  1327. goto fail_unlock;
  1328. }
  1329. if (!expect(device, new_disk_conf->resync_rate >= 1))
  1330. new_disk_conf->resync_rate = 1;
  1331. sanitize_disk_conf(device, new_disk_conf, device->ldev);
  1332. if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
  1333. new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
  1334. fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
  1335. if (fifo_size != device->rs_plan_s->size) {
  1336. new_plan = fifo_alloc(fifo_size);
  1337. if (!new_plan) {
  1338. drbd_err(device, "kmalloc of fifo_buffer failed");
  1339. retcode = ERR_NOMEM;
  1340. goto fail_unlock;
  1341. }
  1342. }
  1343. err = disk_opts_check_al_size(device, new_disk_conf);
  1344. if (err) {
  1345. /* Could be just "busy". Ignore?
  1346. * Introduce dedicated error code? */
  1347. drbd_msg_put_info(adm_ctx.reply_skb,
  1348. "Try again without changing current al-extents setting");
  1349. retcode = ERR_NOMEM;
  1350. goto fail_unlock;
  1351. }
  1352. lock_all_resources();
  1353. retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
  1354. if (retcode == NO_ERROR) {
  1355. rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
  1356. drbd_resync_after_changed(device);
  1357. }
  1358. unlock_all_resources();
  1359. if (retcode != NO_ERROR)
  1360. goto fail_unlock;
  1361. if (new_plan) {
  1362. old_plan = device->rs_plan_s;
  1363. rcu_assign_pointer(device->rs_plan_s, new_plan);
  1364. }
  1365. mutex_unlock(&device->resource->conf_update);
  1366. if (new_disk_conf->al_updates)
  1367. device->ldev->md.flags &= ~MDF_AL_DISABLED;
  1368. else
  1369. device->ldev->md.flags |= MDF_AL_DISABLED;
  1370. if (new_disk_conf->md_flushes)
  1371. clear_bit(MD_NO_FUA, &device->flags);
  1372. else
  1373. set_bit(MD_NO_FUA, &device->flags);
  1374. if (write_ordering_changed(old_disk_conf, new_disk_conf))
  1375. drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
  1376. if (old_disk_conf->discard_zeroes_if_aligned !=
  1377. new_disk_conf->discard_zeroes_if_aligned)
  1378. drbd_reconsider_queue_parameters(device, device->ldev, NULL);
  1379. drbd_md_sync(device);
  1380. if (device->state.conn >= C_CONNECTED) {
  1381. struct drbd_peer_device *peer_device;
  1382. for_each_peer_device(peer_device, device)
  1383. drbd_send_sync_param(peer_device);
  1384. }
  1385. kvfree_rcu_mightsleep(old_disk_conf);
  1386. kfree(old_plan);
  1387. mod_timer(&device->request_timer, jiffies + HZ);
  1388. goto success;
  1389. fail_unlock:
  1390. mutex_unlock(&device->resource->conf_update);
  1391. fail:
  1392. kfree(new_disk_conf);
  1393. kfree(new_plan);
  1394. success:
  1395. put_ldev(device);
  1396. out:
  1397. mutex_unlock(&adm_ctx.resource->adm_mutex);
  1398. finish:
  1399. drbd_adm_finish(&adm_ctx, info, retcode);
  1400. return 0;
  1401. }
  1402. static struct file *open_backing_dev(struct drbd_device *device,
  1403. const char *bdev_path, void *claim_ptr, bool do_bd_link)
  1404. {
  1405. struct file *file;
  1406. int err = 0;
  1407. file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
  1408. claim_ptr, NULL);
  1409. if (IS_ERR(file)) {
  1410. drbd_err(device, "open(\"%s\") failed with %ld\n",
  1411. bdev_path, PTR_ERR(file));
  1412. return file;
  1413. }
  1414. if (!do_bd_link)
  1415. return file;
  1416. err = bd_link_disk_holder(file_bdev(file), device->vdisk);
  1417. if (err) {
  1418. fput(file);
  1419. drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
  1420. bdev_path, err);
  1421. file = ERR_PTR(err);
  1422. }
  1423. return file;
  1424. }
  1425. static int open_backing_devices(struct drbd_device *device,
  1426. struct disk_conf *new_disk_conf,
  1427. struct drbd_backing_dev *nbc)
  1428. {
  1429. struct file *file;
  1430. file = open_backing_dev(device, new_disk_conf->backing_dev, device,
  1431. true);
  1432. if (IS_ERR(file))
  1433. return ERR_OPEN_DISK;
  1434. nbc->backing_bdev = file_bdev(file);
  1435. nbc->backing_bdev_file = file;
  1436. /*
  1437. * meta_dev_idx >= 0: external fixed size, possibly multiple
  1438. * drbd sharing one meta device. TODO in that case, paranoia
  1439. * check that [md_bdev, meta_dev_idx] is not yet used by some
  1440. * other drbd minor! (if you use drbd.conf + drbdadm, that
  1441. * should check it for you already; but if you don't, or
  1442. * someone fooled it, we need to double check here)
  1443. */
  1444. file = open_backing_dev(device, new_disk_conf->meta_dev,
  1445. /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
  1446. * if potentially shared with other drbd minors */
  1447. (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
  1448. /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
  1449. * as would happen with internal metadata. */
  1450. (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
  1451. new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
  1452. if (IS_ERR(file))
  1453. return ERR_OPEN_MD_DISK;
  1454. nbc->md_bdev = file_bdev(file);
  1455. nbc->f_md_bdev = file;
  1456. return NO_ERROR;
  1457. }
  1458. static void close_backing_dev(struct drbd_device *device,
  1459. struct file *bdev_file, bool do_bd_unlink)
  1460. {
  1461. if (!bdev_file)
  1462. return;
  1463. if (do_bd_unlink)
  1464. bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk);
  1465. fput(bdev_file);
  1466. }
  1467. void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
  1468. {
  1469. if (ldev == NULL)
  1470. return;
  1471. close_backing_dev(device, ldev->f_md_bdev,
  1472. ldev->md_bdev != ldev->backing_bdev);
  1473. close_backing_dev(device, ldev->backing_bdev_file, true);
  1474. kfree(ldev->disk_conf);
  1475. kfree(ldev);
  1476. }
  1477. int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  1478. {
  1479. struct drbd_config_context adm_ctx;
  1480. struct drbd_device *device;
  1481. struct drbd_peer_device *peer_device;
  1482. struct drbd_connection *connection;
  1483. int err;
  1484. enum drbd_ret_code retcode;
  1485. enum determine_dev_size dd;
  1486. sector_t max_possible_sectors;
  1487. sector_t min_md_device_sectors;
  1488. struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
  1489. struct disk_conf *new_disk_conf = NULL;
  1490. struct lru_cache *resync_lru = NULL;
  1491. struct fifo_buffer *new_plan = NULL;
  1492. union drbd_state ns, os;
  1493. enum drbd_state_rv rv;
  1494. struct net_conf *nc;
  1495. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  1496. if (!adm_ctx.reply_skb)
  1497. return retcode;
  1498. if (retcode != NO_ERROR)
  1499. goto finish;
  1500. device = adm_ctx.device;
  1501. mutex_lock(&adm_ctx.resource->adm_mutex);
  1502. peer_device = first_peer_device(device);
  1503. connection = peer_device->connection;
  1504. conn_reconfig_start(connection);
  1505. /* if you want to reconfigure, please tear down first */
  1506. if (device->state.disk > D_DISKLESS) {
  1507. retcode = ERR_DISK_CONFIGURED;
  1508. goto fail;
  1509. }
  1510. /* It may just now have detached because of IO error. Make sure
  1511. * drbd_ldev_destroy is done already, we may end up here very fast,
  1512. * e.g. if someone calls attach from the on-io-error handler,
  1513. * to realize a "hot spare" feature (not that I'd recommend that) */
  1514. wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
  1515. /* make sure there is no leftover from previous force-detach attempts */
  1516. clear_bit(FORCE_DETACH, &device->flags);
  1517. clear_bit(WAS_IO_ERROR, &device->flags);
  1518. clear_bit(WAS_READ_ERROR, &device->flags);
  1519. /* and no leftover from previously aborted resync or verify, either */
  1520. device->rs_total = 0;
  1521. device->rs_failed = 0;
  1522. atomic_set(&device->rs_pending_cnt, 0);
  1523. /* allocation not in the IO path, drbdsetup context */
  1524. nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
  1525. if (!nbc) {
  1526. retcode = ERR_NOMEM;
  1527. goto fail;
  1528. }
  1529. spin_lock_init(&nbc->md.uuid_lock);
  1530. new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
  1531. if (!new_disk_conf) {
  1532. retcode = ERR_NOMEM;
  1533. goto fail;
  1534. }
  1535. nbc->disk_conf = new_disk_conf;
  1536. set_disk_conf_defaults(new_disk_conf);
  1537. err = disk_conf_from_attrs(new_disk_conf, info);
  1538. if (err) {
  1539. retcode = ERR_MANDATORY_TAG;
  1540. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  1541. goto fail;
  1542. }
  1543. if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
  1544. new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
  1545. new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
  1546. if (!new_plan) {
  1547. retcode = ERR_NOMEM;
  1548. goto fail;
  1549. }
  1550. if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
  1551. retcode = ERR_MD_IDX_INVALID;
  1552. goto fail;
  1553. }
  1554. rcu_read_lock();
  1555. nc = rcu_dereference(connection->net_conf);
  1556. if (nc) {
  1557. if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
  1558. rcu_read_unlock();
  1559. retcode = ERR_STONITH_AND_PROT_A;
  1560. goto fail;
  1561. }
  1562. }
  1563. rcu_read_unlock();
  1564. retcode = open_backing_devices(device, new_disk_conf, nbc);
  1565. if (retcode != NO_ERROR)
  1566. goto fail;
  1567. if ((nbc->backing_bdev == nbc->md_bdev) !=
  1568. (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
  1569. new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
  1570. retcode = ERR_MD_IDX_INVALID;
  1571. goto fail;
  1572. }
  1573. resync_lru = lc_create("resync", drbd_bm_ext_cache,
  1574. 1, 61, sizeof(struct bm_extent),
  1575. offsetof(struct bm_extent, lce));
  1576. if (!resync_lru) {
  1577. retcode = ERR_NOMEM;
  1578. goto fail;
  1579. }
  1580. /* Read our meta data super block early.
  1581. * This also sets other on-disk offsets. */
  1582. retcode = drbd_md_read(device, nbc);
  1583. if (retcode != NO_ERROR)
  1584. goto fail;
  1585. sanitize_disk_conf(device, new_disk_conf, nbc);
  1586. if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
  1587. drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
  1588. (unsigned long long) drbd_get_max_capacity(nbc),
  1589. (unsigned long long) new_disk_conf->disk_size);
  1590. retcode = ERR_DISK_TOO_SMALL;
  1591. goto fail;
  1592. }
  1593. if (new_disk_conf->meta_dev_idx < 0) {
  1594. max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
  1595. /* at least one MB, otherwise it does not make sense */
  1596. min_md_device_sectors = (2<<10);
  1597. } else {
  1598. max_possible_sectors = DRBD_MAX_SECTORS;
  1599. min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
  1600. }
  1601. if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
  1602. retcode = ERR_MD_DISK_TOO_SMALL;
  1603. drbd_warn(device, "refusing attach: md-device too small, "
  1604. "at least %llu sectors needed for this meta-disk type\n",
  1605. (unsigned long long) min_md_device_sectors);
  1606. goto fail;
  1607. }
  1608. /* Make sure the new disk is big enough
  1609. * (we may currently be R_PRIMARY with no local disk...) */
  1610. if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) {
  1611. retcode = ERR_DISK_TOO_SMALL;
  1612. goto fail;
  1613. }
  1614. nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
  1615. if (nbc->known_size > max_possible_sectors) {
  1616. drbd_warn(device, "==> truncating very big lower level device "
  1617. "to currently maximum possible %llu sectors <==\n",
  1618. (unsigned long long) max_possible_sectors);
  1619. if (new_disk_conf->meta_dev_idx >= 0)
  1620. drbd_warn(device, "==>> using internal or flexible "
  1621. "meta data may help <<==\n");
  1622. }
  1623. drbd_suspend_io(device);
  1624. /* also wait for the last barrier ack. */
  1625. /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
  1626. * We need a way to either ignore barrier acks for barriers sent before a device
  1627. * was attached, or a way to wait for all pending barrier acks to come in.
  1628. * As barriers are counted per resource,
  1629. * we'd need to suspend io on all devices of a resource.
  1630. */
  1631. wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
  1632. /* and for any other previously queued work */
  1633. drbd_flush_workqueue(&connection->sender_work);
  1634. rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
  1635. retcode = (enum drbd_ret_code)rv;
  1636. drbd_resume_io(device);
  1637. if (rv < SS_SUCCESS)
  1638. goto fail;
  1639. if (!get_ldev_if_state(device, D_ATTACHING))
  1640. goto force_diskless;
  1641. if (!device->bitmap) {
  1642. if (drbd_bm_init(device)) {
  1643. retcode = ERR_NOMEM;
  1644. goto force_diskless_dec;
  1645. }
  1646. }
  1647. if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
  1648. (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
  1649. (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
  1650. drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
  1651. (unsigned long long)device->ed_uuid);
  1652. retcode = ERR_DATA_NOT_CURRENT;
  1653. goto force_diskless_dec;
  1654. }
  1655. /* Since we are diskless, fix the activity log first... */
  1656. if (drbd_check_al_size(device, new_disk_conf)) {
  1657. retcode = ERR_NOMEM;
  1658. goto force_diskless_dec;
  1659. }
  1660. /* Prevent shrinking of consistent devices ! */
  1661. {
  1662. unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
  1663. unsigned long long eff = nbc->md.la_size_sect;
  1664. if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
  1665. if (nsz == nbc->disk_conf->disk_size) {
  1666. drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
  1667. } else {
  1668. drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
  1669. drbd_msg_sprintf_info(adm_ctx.reply_skb,
  1670. "To-be-attached device has last effective > current size, and is consistent\n"
  1671. "(%llu > %llu sectors). Refusing to attach.", eff, nsz);
  1672. retcode = ERR_IMPLICIT_SHRINK;
  1673. goto force_diskless_dec;
  1674. }
  1675. }
  1676. }
  1677. lock_all_resources();
  1678. retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
  1679. if (retcode != NO_ERROR) {
  1680. unlock_all_resources();
  1681. goto force_diskless_dec;
  1682. }
  1683. /* Reset the "barriers don't work" bits here, then force meta data to
  1684. * be written, to ensure we determine if barriers are supported. */
  1685. if (new_disk_conf->md_flushes)
  1686. clear_bit(MD_NO_FUA, &device->flags);
  1687. else
  1688. set_bit(MD_NO_FUA, &device->flags);
  1689. /* Point of no return reached.
  1690. * Devices and memory are no longer released by error cleanup below.
  1691. * now device takes over responsibility, and the state engine should
  1692. * clean it up somewhere. */
  1693. D_ASSERT(device, device->ldev == NULL);
  1694. device->ldev = nbc;
  1695. device->resync = resync_lru;
  1696. device->rs_plan_s = new_plan;
  1697. nbc = NULL;
  1698. resync_lru = NULL;
  1699. new_disk_conf = NULL;
  1700. new_plan = NULL;
  1701. drbd_resync_after_changed(device);
  1702. drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
  1703. unlock_all_resources();
  1704. if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
  1705. set_bit(CRASHED_PRIMARY, &device->flags);
  1706. else
  1707. clear_bit(CRASHED_PRIMARY, &device->flags);
  1708. if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
  1709. !(device->state.role == R_PRIMARY && device->resource->susp_nod))
  1710. set_bit(CRASHED_PRIMARY, &device->flags);
  1711. device->send_cnt = 0;
  1712. device->recv_cnt = 0;
  1713. device->read_cnt = 0;
  1714. device->writ_cnt = 0;
  1715. drbd_reconsider_queue_parameters(device, device->ldev, NULL);
  1716. /* If I am currently not R_PRIMARY,
  1717. * but meta data primary indicator is set,
  1718. * I just now recover from a hard crash,
  1719. * and have been R_PRIMARY before that crash.
  1720. *
  1721. * Now, if I had no connection before that crash
  1722. * (have been degraded R_PRIMARY), chances are that
  1723. * I won't find my peer now either.
  1724. *
  1725. * In that case, and _only_ in that case,
  1726. * we use the degr-wfc-timeout instead of the default,
  1727. * so we can automatically recover from a crash of a
  1728. * degraded but active "cluster" after a certain timeout.
  1729. */
  1730. clear_bit(USE_DEGR_WFC_T, &device->flags);
  1731. if (device->state.role != R_PRIMARY &&
  1732. drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
  1733. !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
  1734. set_bit(USE_DEGR_WFC_T, &device->flags);
  1735. dd = drbd_determine_dev_size(device, 0, NULL);
  1736. if (dd <= DS_ERROR) {
  1737. retcode = ERR_NOMEM_BITMAP;
  1738. goto force_diskless_dec;
  1739. } else if (dd == DS_GREW)
  1740. set_bit(RESYNC_AFTER_NEG, &device->flags);
  1741. if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
  1742. (test_bit(CRASHED_PRIMARY, &device->flags) &&
  1743. drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
  1744. drbd_info(device, "Assuming that all blocks are out of sync "
  1745. "(aka FullSync)\n");
  1746. if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
  1747. "set_n_write from attaching", BM_LOCKED_MASK,
  1748. NULL)) {
  1749. retcode = ERR_IO_MD_DISK;
  1750. goto force_diskless_dec;
  1751. }
  1752. } else {
  1753. if (drbd_bitmap_io(device, &drbd_bm_read,
  1754. "read from attaching", BM_LOCKED_MASK,
  1755. NULL)) {
  1756. retcode = ERR_IO_MD_DISK;
  1757. goto force_diskless_dec;
  1758. }
  1759. }
  1760. if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
  1761. drbd_suspend_al(device); /* IO is still suspended here... */
  1762. spin_lock_irq(&device->resource->req_lock);
  1763. os = drbd_read_state(device);
  1764. ns = os;
  1765. /* If MDF_CONSISTENT is not set go into inconsistent state,
  1766. otherwise investigate MDF_WasUpToDate...
  1767. If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
  1768. otherwise into D_CONSISTENT state.
  1769. */
  1770. if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
  1771. if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
  1772. ns.disk = D_CONSISTENT;
  1773. else
  1774. ns.disk = D_OUTDATED;
  1775. } else {
  1776. ns.disk = D_INCONSISTENT;
  1777. }
  1778. if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
  1779. ns.pdsk = D_OUTDATED;
  1780. rcu_read_lock();
  1781. if (ns.disk == D_CONSISTENT &&
  1782. (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
  1783. ns.disk = D_UP_TO_DATE;
  1784. /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
  1785. MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
  1786. this point, because drbd_request_state() modifies these
  1787. flags. */
  1788. if (rcu_dereference(device->ldev->disk_conf)->al_updates)
  1789. device->ldev->md.flags &= ~MDF_AL_DISABLED;
  1790. else
  1791. device->ldev->md.flags |= MDF_AL_DISABLED;
  1792. rcu_read_unlock();
  1793. /* In case we are C_CONNECTED postpone any decision on the new disk
  1794. state after the negotiation phase. */
  1795. if (device->state.conn == C_CONNECTED) {
  1796. device->new_state_tmp.i = ns.i;
  1797. ns.i = os.i;
  1798. ns.disk = D_NEGOTIATING;
  1799. /* We expect to receive up-to-date UUIDs soon.
  1800. To avoid a race in receive_state, free p_uuid while
  1801. holding req_lock. I.e. atomic with the state change */
  1802. kfree(device->p_uuid);
  1803. device->p_uuid = NULL;
  1804. }
  1805. rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
  1806. spin_unlock_irq(&device->resource->req_lock);
  1807. if (rv < SS_SUCCESS)
  1808. goto force_diskless_dec;
  1809. mod_timer(&device->request_timer, jiffies + HZ);
  1810. if (device->state.role == R_PRIMARY)
  1811. device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
  1812. else
  1813. device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
  1814. drbd_md_mark_dirty(device);
  1815. drbd_md_sync(device);
  1816. kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
  1817. put_ldev(device);
  1818. conn_reconfig_done(connection);
  1819. mutex_unlock(&adm_ctx.resource->adm_mutex);
  1820. drbd_adm_finish(&adm_ctx, info, retcode);
  1821. return 0;
  1822. force_diskless_dec:
  1823. put_ldev(device);
  1824. force_diskless:
  1825. drbd_force_state(device, NS(disk, D_DISKLESS));
  1826. drbd_md_sync(device);
  1827. fail:
  1828. conn_reconfig_done(connection);
  1829. if (nbc) {
  1830. close_backing_dev(device, nbc->f_md_bdev,
  1831. nbc->md_bdev != nbc->backing_bdev);
  1832. close_backing_dev(device, nbc->backing_bdev_file, true);
  1833. kfree(nbc);
  1834. }
  1835. kfree(new_disk_conf);
  1836. lc_destroy(resync_lru);
  1837. kfree(new_plan);
  1838. mutex_unlock(&adm_ctx.resource->adm_mutex);
  1839. finish:
  1840. drbd_adm_finish(&adm_ctx, info, retcode);
  1841. return 0;
  1842. }
  1843. static int adm_detach(struct drbd_device *device, int force)
  1844. {
  1845. if (force) {
  1846. set_bit(FORCE_DETACH, &device->flags);
  1847. drbd_force_state(device, NS(disk, D_FAILED));
  1848. return SS_SUCCESS;
  1849. }
  1850. return drbd_request_detach_interruptible(device);
  1851. }
  1852. /* Detaching the disk is a process in multiple stages. First we need to lock
  1853. * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
  1854. * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
  1855. * internal references as well.
  1856. * Only then we have finally detached. */
  1857. int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
  1858. {
  1859. struct drbd_config_context adm_ctx;
  1860. enum drbd_ret_code retcode;
  1861. struct detach_parms parms = { };
  1862. int err;
  1863. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  1864. if (!adm_ctx.reply_skb)
  1865. return retcode;
  1866. if (retcode != NO_ERROR)
  1867. goto out;
  1868. if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
  1869. err = detach_parms_from_attrs(&parms, info);
  1870. if (err) {
  1871. retcode = ERR_MANDATORY_TAG;
  1872. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  1873. goto out;
  1874. }
  1875. }
  1876. mutex_lock(&adm_ctx.resource->adm_mutex);
  1877. retcode = adm_detach(adm_ctx.device, parms.force_detach);
  1878. mutex_unlock(&adm_ctx.resource->adm_mutex);
  1879. out:
  1880. drbd_adm_finish(&adm_ctx, info, retcode);
  1881. return 0;
  1882. }
  1883. static bool conn_resync_running(struct drbd_connection *connection)
  1884. {
  1885. struct drbd_peer_device *peer_device;
  1886. bool rv = false;
  1887. int vnr;
  1888. rcu_read_lock();
  1889. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1890. struct drbd_device *device = peer_device->device;
  1891. if (device->state.conn == C_SYNC_SOURCE ||
  1892. device->state.conn == C_SYNC_TARGET ||
  1893. device->state.conn == C_PAUSED_SYNC_S ||
  1894. device->state.conn == C_PAUSED_SYNC_T) {
  1895. rv = true;
  1896. break;
  1897. }
  1898. }
  1899. rcu_read_unlock();
  1900. return rv;
  1901. }
  1902. static bool conn_ov_running(struct drbd_connection *connection)
  1903. {
  1904. struct drbd_peer_device *peer_device;
  1905. bool rv = false;
  1906. int vnr;
  1907. rcu_read_lock();
  1908. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1909. struct drbd_device *device = peer_device->device;
  1910. if (device->state.conn == C_VERIFY_S ||
  1911. device->state.conn == C_VERIFY_T) {
  1912. rv = true;
  1913. break;
  1914. }
  1915. }
  1916. rcu_read_unlock();
  1917. return rv;
  1918. }
  1919. static enum drbd_ret_code
  1920. _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
  1921. {
  1922. struct drbd_peer_device *peer_device;
  1923. int i;
  1924. if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
  1925. if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
  1926. return ERR_NEED_APV_100;
  1927. if (new_net_conf->two_primaries != old_net_conf->two_primaries)
  1928. return ERR_NEED_APV_100;
  1929. if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
  1930. return ERR_NEED_APV_100;
  1931. }
  1932. if (!new_net_conf->two_primaries &&
  1933. conn_highest_role(connection) == R_PRIMARY &&
  1934. conn_highest_peer(connection) == R_PRIMARY)
  1935. return ERR_NEED_ALLOW_TWO_PRI;
  1936. if (new_net_conf->two_primaries &&
  1937. (new_net_conf->wire_protocol != DRBD_PROT_C))
  1938. return ERR_NOT_PROTO_C;
  1939. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  1940. struct drbd_device *device = peer_device->device;
  1941. if (get_ldev(device)) {
  1942. enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
  1943. put_ldev(device);
  1944. if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
  1945. return ERR_STONITH_AND_PROT_A;
  1946. }
  1947. if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
  1948. return ERR_DISCARD_IMPOSSIBLE;
  1949. }
  1950. if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
  1951. return ERR_CONG_NOT_PROTO_A;
  1952. return NO_ERROR;
  1953. }
  1954. static enum drbd_ret_code
  1955. check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
  1956. {
  1957. enum drbd_ret_code rv;
  1958. struct drbd_peer_device *peer_device;
  1959. int i;
  1960. rcu_read_lock();
  1961. rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
  1962. rcu_read_unlock();
  1963. /* connection->peer_devices protected by genl_lock() here */
  1964. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  1965. struct drbd_device *device = peer_device->device;
  1966. if (!device->bitmap) {
  1967. if (drbd_bm_init(device))
  1968. return ERR_NOMEM;
  1969. }
  1970. }
  1971. return rv;
  1972. }
  1973. struct crypto {
  1974. struct crypto_shash *verify_tfm;
  1975. struct crypto_shash *csums_tfm;
  1976. struct crypto_shash *cram_hmac_tfm;
  1977. struct crypto_shash *integrity_tfm;
  1978. };
  1979. static int
  1980. alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
  1981. {
  1982. if (!tfm_name[0])
  1983. return NO_ERROR;
  1984. *tfm = crypto_alloc_shash(tfm_name, 0, 0);
  1985. if (IS_ERR(*tfm)) {
  1986. *tfm = NULL;
  1987. return err_alg;
  1988. }
  1989. return NO_ERROR;
  1990. }
  1991. static enum drbd_ret_code
  1992. alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
  1993. {
  1994. char hmac_name[CRYPTO_MAX_ALG_NAME];
  1995. enum drbd_ret_code rv;
  1996. rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg,
  1997. ERR_CSUMS_ALG);
  1998. if (rv != NO_ERROR)
  1999. return rv;
  2000. rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg,
  2001. ERR_VERIFY_ALG);
  2002. if (rv != NO_ERROR)
  2003. return rv;
  2004. rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
  2005. ERR_INTEGRITY_ALG);
  2006. if (rv != NO_ERROR)
  2007. return rv;
  2008. if (new_net_conf->cram_hmac_alg[0] != 0) {
  2009. snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
  2010. new_net_conf->cram_hmac_alg);
  2011. rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name,
  2012. ERR_AUTH_ALG);
  2013. }
  2014. return rv;
  2015. }
  2016. static void free_crypto(struct crypto *crypto)
  2017. {
  2018. crypto_free_shash(crypto->cram_hmac_tfm);
  2019. crypto_free_shash(crypto->integrity_tfm);
  2020. crypto_free_shash(crypto->csums_tfm);
  2021. crypto_free_shash(crypto->verify_tfm);
  2022. }
  2023. int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
  2024. {
  2025. struct drbd_config_context adm_ctx;
  2026. enum drbd_ret_code retcode;
  2027. struct drbd_connection *connection;
  2028. struct net_conf *old_net_conf, *new_net_conf = NULL;
  2029. int err;
  2030. int ovr; /* online verify running */
  2031. int rsr; /* re-sync running */
  2032. struct crypto crypto = { };
  2033. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
  2034. if (!adm_ctx.reply_skb)
  2035. return retcode;
  2036. if (retcode != NO_ERROR)
  2037. goto finish;
  2038. connection = adm_ctx.connection;
  2039. mutex_lock(&adm_ctx.resource->adm_mutex);
  2040. new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
  2041. if (!new_net_conf) {
  2042. retcode = ERR_NOMEM;
  2043. goto out;
  2044. }
  2045. conn_reconfig_start(connection);
  2046. mutex_lock(&connection->data.mutex);
  2047. mutex_lock(&connection->resource->conf_update);
  2048. old_net_conf = connection->net_conf;
  2049. if (!old_net_conf) {
  2050. drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
  2051. retcode = ERR_INVALID_REQUEST;
  2052. goto fail;
  2053. }
  2054. *new_net_conf = *old_net_conf;
  2055. if (should_set_defaults(info))
  2056. set_net_conf_defaults(new_net_conf);
  2057. err = net_conf_from_attrs_for_change(new_net_conf, info);
  2058. if (err && err != -ENOMSG) {
  2059. retcode = ERR_MANDATORY_TAG;
  2060. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  2061. goto fail;
  2062. }
  2063. retcode = check_net_options(connection, new_net_conf);
  2064. if (retcode != NO_ERROR)
  2065. goto fail;
  2066. /* re-sync running */
  2067. rsr = conn_resync_running(connection);
  2068. if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
  2069. retcode = ERR_CSUMS_RESYNC_RUNNING;
  2070. goto fail;
  2071. }
  2072. /* online verify running */
  2073. ovr = conn_ov_running(connection);
  2074. if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
  2075. retcode = ERR_VERIFY_RUNNING;
  2076. goto fail;
  2077. }
  2078. retcode = alloc_crypto(&crypto, new_net_conf);
  2079. if (retcode != NO_ERROR)
  2080. goto fail;
  2081. rcu_assign_pointer(connection->net_conf, new_net_conf);
  2082. if (!rsr) {
  2083. crypto_free_shash(connection->csums_tfm);
  2084. connection->csums_tfm = crypto.csums_tfm;
  2085. crypto.csums_tfm = NULL;
  2086. }
  2087. if (!ovr) {
  2088. crypto_free_shash(connection->verify_tfm);
  2089. connection->verify_tfm = crypto.verify_tfm;
  2090. crypto.verify_tfm = NULL;
  2091. }
  2092. crypto_free_shash(connection->integrity_tfm);
  2093. connection->integrity_tfm = crypto.integrity_tfm;
  2094. if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
  2095. /* Do this without trying to take connection->data.mutex again. */
  2096. __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
  2097. crypto_free_shash(connection->cram_hmac_tfm);
  2098. connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
  2099. mutex_unlock(&connection->resource->conf_update);
  2100. mutex_unlock(&connection->data.mutex);
  2101. kvfree_rcu_mightsleep(old_net_conf);
  2102. if (connection->cstate >= C_WF_REPORT_PARAMS) {
  2103. struct drbd_peer_device *peer_device;
  2104. int vnr;
  2105. idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
  2106. drbd_send_sync_param(peer_device);
  2107. }
  2108. goto done;
  2109. fail:
  2110. mutex_unlock(&connection->resource->conf_update);
  2111. mutex_unlock(&connection->data.mutex);
  2112. free_crypto(&crypto);
  2113. kfree(new_net_conf);
  2114. done:
  2115. conn_reconfig_done(connection);
  2116. out:
  2117. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2118. finish:
  2119. drbd_adm_finish(&adm_ctx, info, retcode);
  2120. return 0;
  2121. }
  2122. static void connection_to_info(struct connection_info *info,
  2123. struct drbd_connection *connection)
  2124. {
  2125. info->conn_connection_state = connection->cstate;
  2126. info->conn_role = conn_highest_peer(connection);
  2127. }
  2128. static void peer_device_to_info(struct peer_device_info *info,
  2129. struct drbd_peer_device *peer_device)
  2130. {
  2131. struct drbd_device *device = peer_device->device;
  2132. info->peer_repl_state =
  2133. max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
  2134. info->peer_disk_state = device->state.pdsk;
  2135. info->peer_resync_susp_user = device->state.user_isp;
  2136. info->peer_resync_susp_peer = device->state.peer_isp;
  2137. info->peer_resync_susp_dependency = device->state.aftr_isp;
  2138. }
  2139. int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
  2140. {
  2141. struct connection_info connection_info;
  2142. enum drbd_notification_type flags;
  2143. unsigned int peer_devices = 0;
  2144. struct drbd_config_context adm_ctx;
  2145. struct drbd_peer_device *peer_device;
  2146. struct net_conf *old_net_conf, *new_net_conf = NULL;
  2147. struct crypto crypto = { };
  2148. struct drbd_resource *resource;
  2149. struct drbd_connection *connection;
  2150. enum drbd_ret_code retcode;
  2151. enum drbd_state_rv rv;
  2152. int i;
  2153. int err;
  2154. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
  2155. if (!adm_ctx.reply_skb)
  2156. return retcode;
  2157. if (retcode != NO_ERROR)
  2158. goto out;
  2159. if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
  2160. drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
  2161. retcode = ERR_INVALID_REQUEST;
  2162. goto out;
  2163. }
  2164. /* No need for _rcu here. All reconfiguration is
  2165. * strictly serialized on genl_lock(). We are protected against
  2166. * concurrent reconfiguration/addition/deletion */
  2167. for_each_resource(resource, &drbd_resources) {
  2168. for_each_connection(connection, resource) {
  2169. if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
  2170. !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
  2171. connection->my_addr_len)) {
  2172. retcode = ERR_LOCAL_ADDR;
  2173. goto out;
  2174. }
  2175. if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
  2176. !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
  2177. connection->peer_addr_len)) {
  2178. retcode = ERR_PEER_ADDR;
  2179. goto out;
  2180. }
  2181. }
  2182. }
  2183. mutex_lock(&adm_ctx.resource->adm_mutex);
  2184. connection = first_connection(adm_ctx.resource);
  2185. conn_reconfig_start(connection);
  2186. if (connection->cstate > C_STANDALONE) {
  2187. retcode = ERR_NET_CONFIGURED;
  2188. goto fail;
  2189. }
  2190. /* allocation not in the IO path, drbdsetup / netlink process context */
  2191. new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
  2192. if (!new_net_conf) {
  2193. retcode = ERR_NOMEM;
  2194. goto fail;
  2195. }
  2196. set_net_conf_defaults(new_net_conf);
  2197. err = net_conf_from_attrs(new_net_conf, info);
  2198. if (err && err != -ENOMSG) {
  2199. retcode = ERR_MANDATORY_TAG;
  2200. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  2201. goto fail;
  2202. }
  2203. retcode = check_net_options(connection, new_net_conf);
  2204. if (retcode != NO_ERROR)
  2205. goto fail;
  2206. retcode = alloc_crypto(&crypto, new_net_conf);
  2207. if (retcode != NO_ERROR)
  2208. goto fail;
  2209. ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
  2210. drbd_flush_workqueue(&connection->sender_work);
  2211. mutex_lock(&adm_ctx.resource->conf_update);
  2212. old_net_conf = connection->net_conf;
  2213. if (old_net_conf) {
  2214. retcode = ERR_NET_CONFIGURED;
  2215. mutex_unlock(&adm_ctx.resource->conf_update);
  2216. goto fail;
  2217. }
  2218. rcu_assign_pointer(connection->net_conf, new_net_conf);
  2219. conn_free_crypto(connection);
  2220. connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
  2221. connection->integrity_tfm = crypto.integrity_tfm;
  2222. connection->csums_tfm = crypto.csums_tfm;
  2223. connection->verify_tfm = crypto.verify_tfm;
  2224. connection->my_addr_len = nla_len(adm_ctx.my_addr);
  2225. memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
  2226. connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
  2227. memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
  2228. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  2229. peer_devices++;
  2230. }
  2231. connection_to_info(&connection_info, connection);
  2232. flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
  2233. mutex_lock(&notification_mutex);
  2234. notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
  2235. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  2236. struct peer_device_info peer_device_info;
  2237. peer_device_to_info(&peer_device_info, peer_device);
  2238. flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
  2239. notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
  2240. }
  2241. mutex_unlock(&notification_mutex);
  2242. mutex_unlock(&adm_ctx.resource->conf_update);
  2243. rcu_read_lock();
  2244. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  2245. struct drbd_device *device = peer_device->device;
  2246. device->send_cnt = 0;
  2247. device->recv_cnt = 0;
  2248. }
  2249. rcu_read_unlock();
  2250. rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
  2251. conn_reconfig_done(connection);
  2252. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2253. drbd_adm_finish(&adm_ctx, info, rv);
  2254. return 0;
  2255. fail:
  2256. free_crypto(&crypto);
  2257. kfree(new_net_conf);
  2258. conn_reconfig_done(connection);
  2259. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2260. out:
  2261. drbd_adm_finish(&adm_ctx, info, retcode);
  2262. return 0;
  2263. }
  2264. static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
  2265. {
  2266. enum drbd_conns cstate;
  2267. enum drbd_state_rv rv;
  2268. repeat:
  2269. rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
  2270. force ? CS_HARD : 0);
  2271. switch (rv) {
  2272. case SS_NOTHING_TO_DO:
  2273. break;
  2274. case SS_ALREADY_STANDALONE:
  2275. return SS_SUCCESS;
  2276. case SS_PRIMARY_NOP:
  2277. /* Our state checking code wants to see the peer outdated. */
  2278. rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
  2279. if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
  2280. rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
  2281. break;
  2282. case SS_CW_FAILED_BY_PEER:
  2283. spin_lock_irq(&connection->resource->req_lock);
  2284. cstate = connection->cstate;
  2285. spin_unlock_irq(&connection->resource->req_lock);
  2286. if (cstate <= C_WF_CONNECTION)
  2287. goto repeat;
  2288. /* The peer probably wants to see us outdated. */
  2289. rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
  2290. disk, D_OUTDATED), 0);
  2291. if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
  2292. rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
  2293. CS_HARD);
  2294. }
  2295. break;
  2296. default:;
  2297. /* no special handling necessary */
  2298. }
  2299. if (rv >= SS_SUCCESS) {
  2300. enum drbd_state_rv rv2;
  2301. /* No one else can reconfigure the network while I am here.
  2302. * The state handling only uses drbd_thread_stop_nowait(),
  2303. * we want to really wait here until the receiver is no more.
  2304. */
  2305. drbd_thread_stop(&connection->receiver);
  2306. /* Race breaker. This additional state change request may be
  2307. * necessary, if this was a forced disconnect during a receiver
  2308. * restart. We may have "killed" the receiver thread just
  2309. * after drbd_receiver() returned. Typically, we should be
  2310. * C_STANDALONE already, now, and this becomes a no-op.
  2311. */
  2312. rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
  2313. CS_VERBOSE | CS_HARD);
  2314. if (rv2 < SS_SUCCESS)
  2315. drbd_err(connection,
  2316. "unexpected rv2=%d in conn_try_disconnect()\n",
  2317. rv2);
  2318. /* Unlike in DRBD 9, the state engine has generated
  2319. * NOTIFY_DESTROY events before clearing connection->net_conf. */
  2320. }
  2321. return rv;
  2322. }
  2323. int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
  2324. {
  2325. struct drbd_config_context adm_ctx;
  2326. struct disconnect_parms parms;
  2327. struct drbd_connection *connection;
  2328. enum drbd_state_rv rv;
  2329. enum drbd_ret_code retcode;
  2330. int err;
  2331. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
  2332. if (!adm_ctx.reply_skb)
  2333. return retcode;
  2334. if (retcode != NO_ERROR)
  2335. goto fail;
  2336. connection = adm_ctx.connection;
  2337. memset(&parms, 0, sizeof(parms));
  2338. if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
  2339. err = disconnect_parms_from_attrs(&parms, info);
  2340. if (err) {
  2341. retcode = ERR_MANDATORY_TAG;
  2342. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  2343. goto fail;
  2344. }
  2345. }
  2346. mutex_lock(&adm_ctx.resource->adm_mutex);
  2347. rv = conn_try_disconnect(connection, parms.force_disconnect);
  2348. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2349. if (rv < SS_SUCCESS) {
  2350. drbd_adm_finish(&adm_ctx, info, rv);
  2351. return 0;
  2352. }
  2353. retcode = NO_ERROR;
  2354. fail:
  2355. drbd_adm_finish(&adm_ctx, info, retcode);
  2356. return 0;
  2357. }
  2358. void resync_after_online_grow(struct drbd_device *device)
  2359. {
  2360. int iass; /* I am sync source */
  2361. drbd_info(device, "Resync of new storage after online grow\n");
  2362. if (device->state.role != device->state.peer)
  2363. iass = (device->state.role == R_PRIMARY);
  2364. else
  2365. iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
  2366. if (iass)
  2367. drbd_start_resync(device, C_SYNC_SOURCE);
  2368. else
  2369. _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
  2370. }
  2371. int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
  2372. {
  2373. struct drbd_config_context adm_ctx;
  2374. struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
  2375. struct resize_parms rs;
  2376. struct drbd_device *device;
  2377. enum drbd_ret_code retcode;
  2378. enum determine_dev_size dd;
  2379. bool change_al_layout = false;
  2380. enum dds_flags ddsf;
  2381. sector_t u_size;
  2382. int err;
  2383. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2384. if (!adm_ctx.reply_skb)
  2385. return retcode;
  2386. if (retcode != NO_ERROR)
  2387. goto finish;
  2388. mutex_lock(&adm_ctx.resource->adm_mutex);
  2389. device = adm_ctx.device;
  2390. if (!get_ldev(device)) {
  2391. retcode = ERR_NO_DISK;
  2392. goto fail;
  2393. }
  2394. memset(&rs, 0, sizeof(struct resize_parms));
  2395. rs.al_stripes = device->ldev->md.al_stripes;
  2396. rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
  2397. if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
  2398. err = resize_parms_from_attrs(&rs, info);
  2399. if (err) {
  2400. retcode = ERR_MANDATORY_TAG;
  2401. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  2402. goto fail_ldev;
  2403. }
  2404. }
  2405. if (device->state.conn > C_CONNECTED) {
  2406. retcode = ERR_RESIZE_RESYNC;
  2407. goto fail_ldev;
  2408. }
  2409. if (device->state.role == R_SECONDARY &&
  2410. device->state.peer == R_SECONDARY) {
  2411. retcode = ERR_NO_PRIMARY;
  2412. goto fail_ldev;
  2413. }
  2414. if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
  2415. retcode = ERR_NEED_APV_93;
  2416. goto fail_ldev;
  2417. }
  2418. rcu_read_lock();
  2419. u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
  2420. rcu_read_unlock();
  2421. if (u_size != (sector_t)rs.resize_size) {
  2422. new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
  2423. if (!new_disk_conf) {
  2424. retcode = ERR_NOMEM;
  2425. goto fail_ldev;
  2426. }
  2427. }
  2428. if (device->ldev->md.al_stripes != rs.al_stripes ||
  2429. device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
  2430. u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
  2431. if (al_size_k > (16 * 1024 * 1024)) {
  2432. retcode = ERR_MD_LAYOUT_TOO_BIG;
  2433. goto fail_ldev;
  2434. }
  2435. if (al_size_k < MD_32kB_SECT/2) {
  2436. retcode = ERR_MD_LAYOUT_TOO_SMALL;
  2437. goto fail_ldev;
  2438. }
  2439. if (device->state.conn != C_CONNECTED && !rs.resize_force) {
  2440. retcode = ERR_MD_LAYOUT_CONNECTED;
  2441. goto fail_ldev;
  2442. }
  2443. change_al_layout = true;
  2444. }
  2445. if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
  2446. device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
  2447. if (new_disk_conf) {
  2448. mutex_lock(&device->resource->conf_update);
  2449. old_disk_conf = device->ldev->disk_conf;
  2450. *new_disk_conf = *old_disk_conf;
  2451. new_disk_conf->disk_size = (sector_t)rs.resize_size;
  2452. rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
  2453. mutex_unlock(&device->resource->conf_update);
  2454. kvfree_rcu_mightsleep(old_disk_conf);
  2455. new_disk_conf = NULL;
  2456. }
  2457. ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
  2458. dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
  2459. drbd_md_sync(device);
  2460. put_ldev(device);
  2461. if (dd == DS_ERROR) {
  2462. retcode = ERR_NOMEM_BITMAP;
  2463. goto fail;
  2464. } else if (dd == DS_ERROR_SPACE_MD) {
  2465. retcode = ERR_MD_LAYOUT_NO_FIT;
  2466. goto fail;
  2467. } else if (dd == DS_ERROR_SHRINK) {
  2468. retcode = ERR_IMPLICIT_SHRINK;
  2469. goto fail;
  2470. }
  2471. if (device->state.conn == C_CONNECTED) {
  2472. if (dd == DS_GREW)
  2473. set_bit(RESIZE_PENDING, &device->flags);
  2474. drbd_send_uuids(first_peer_device(device));
  2475. drbd_send_sizes(first_peer_device(device), 1, ddsf);
  2476. }
  2477. fail:
  2478. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2479. finish:
  2480. drbd_adm_finish(&adm_ctx, info, retcode);
  2481. return 0;
  2482. fail_ldev:
  2483. put_ldev(device);
  2484. kfree(new_disk_conf);
  2485. goto fail;
  2486. }
  2487. int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
  2488. {
  2489. struct drbd_config_context adm_ctx;
  2490. enum drbd_ret_code retcode;
  2491. struct res_opts res_opts;
  2492. int err;
  2493. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
  2494. if (!adm_ctx.reply_skb)
  2495. return retcode;
  2496. if (retcode != NO_ERROR)
  2497. goto fail;
  2498. res_opts = adm_ctx.resource->res_opts;
  2499. if (should_set_defaults(info))
  2500. set_res_opts_defaults(&res_opts);
  2501. err = res_opts_from_attrs(&res_opts, info);
  2502. if (err && err != -ENOMSG) {
  2503. retcode = ERR_MANDATORY_TAG;
  2504. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  2505. goto fail;
  2506. }
  2507. mutex_lock(&adm_ctx.resource->adm_mutex);
  2508. err = set_resource_options(adm_ctx.resource, &res_opts);
  2509. if (err) {
  2510. retcode = ERR_INVALID_REQUEST;
  2511. if (err == -ENOMEM)
  2512. retcode = ERR_NOMEM;
  2513. }
  2514. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2515. fail:
  2516. drbd_adm_finish(&adm_ctx, info, retcode);
  2517. return 0;
  2518. }
  2519. int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
  2520. {
  2521. struct drbd_config_context adm_ctx;
  2522. struct drbd_device *device;
  2523. int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
  2524. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2525. if (!adm_ctx.reply_skb)
  2526. return retcode;
  2527. if (retcode != NO_ERROR)
  2528. goto out;
  2529. device = adm_ctx.device;
  2530. if (!get_ldev(device)) {
  2531. retcode = ERR_NO_DISK;
  2532. goto out;
  2533. }
  2534. mutex_lock(&adm_ctx.resource->adm_mutex);
  2535. /* If there is still bitmap IO pending, probably because of a previous
  2536. * resync just being finished, wait for it before requesting a new resync.
  2537. * Also wait for it's after_state_ch(). */
  2538. drbd_suspend_io(device);
  2539. wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
  2540. drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
  2541. /* If we happen to be C_STANDALONE R_SECONDARY, just change to
  2542. * D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
  2543. * try to start a resync handshake as sync target for full sync.
  2544. */
  2545. if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
  2546. retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
  2547. if (retcode >= SS_SUCCESS) {
  2548. if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
  2549. "set_n_write from invalidate", BM_LOCKED_MASK, NULL))
  2550. retcode = ERR_IO_MD_DISK;
  2551. }
  2552. } else
  2553. retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
  2554. drbd_resume_io(device);
  2555. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2556. put_ldev(device);
  2557. out:
  2558. drbd_adm_finish(&adm_ctx, info, retcode);
  2559. return 0;
  2560. }
  2561. static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
  2562. union drbd_state mask, union drbd_state val)
  2563. {
  2564. struct drbd_config_context adm_ctx;
  2565. enum drbd_ret_code retcode;
  2566. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2567. if (!adm_ctx.reply_skb)
  2568. return retcode;
  2569. if (retcode != NO_ERROR)
  2570. goto out;
  2571. mutex_lock(&adm_ctx.resource->adm_mutex);
  2572. retcode = drbd_request_state(adm_ctx.device, mask, val);
  2573. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2574. out:
  2575. drbd_adm_finish(&adm_ctx, info, retcode);
  2576. return 0;
  2577. }
  2578. static int drbd_bmio_set_susp_al(struct drbd_device *device,
  2579. struct drbd_peer_device *peer_device) __must_hold(local)
  2580. {
  2581. int rv;
  2582. rv = drbd_bmio_set_n_write(device, peer_device);
  2583. drbd_suspend_al(device);
  2584. return rv;
  2585. }
  2586. int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
  2587. {
  2588. struct drbd_config_context adm_ctx;
  2589. int retcode; /* drbd_ret_code, drbd_state_rv */
  2590. struct drbd_device *device;
  2591. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2592. if (!adm_ctx.reply_skb)
  2593. return retcode;
  2594. if (retcode != NO_ERROR)
  2595. goto out;
  2596. device = adm_ctx.device;
  2597. if (!get_ldev(device)) {
  2598. retcode = ERR_NO_DISK;
  2599. goto out;
  2600. }
  2601. mutex_lock(&adm_ctx.resource->adm_mutex);
  2602. /* If there is still bitmap IO pending, probably because of a previous
  2603. * resync just being finished, wait for it before requesting a new resync.
  2604. * Also wait for it's after_state_ch(). */
  2605. drbd_suspend_io(device);
  2606. wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
  2607. drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
  2608. /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
  2609. * in the bitmap. Otherwise, try to start a resync handshake
  2610. * as sync source for full sync.
  2611. */
  2612. if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
  2613. /* The peer will get a resync upon connect anyways. Just make that
  2614. into a full resync. */
  2615. retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
  2616. if (retcode >= SS_SUCCESS) {
  2617. if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
  2618. "set_n_write from invalidate_peer",
  2619. BM_LOCKED_SET_ALLOWED, NULL))
  2620. retcode = ERR_IO_MD_DISK;
  2621. }
  2622. } else
  2623. retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
  2624. drbd_resume_io(device);
  2625. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2626. put_ldev(device);
  2627. out:
  2628. drbd_adm_finish(&adm_ctx, info, retcode);
  2629. return 0;
  2630. }
  2631. int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
  2632. {
  2633. struct drbd_config_context adm_ctx;
  2634. enum drbd_ret_code retcode;
  2635. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2636. if (!adm_ctx.reply_skb)
  2637. return retcode;
  2638. if (retcode != NO_ERROR)
  2639. goto out;
  2640. mutex_lock(&adm_ctx.resource->adm_mutex);
  2641. if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
  2642. retcode = ERR_PAUSE_IS_SET;
  2643. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2644. out:
  2645. drbd_adm_finish(&adm_ctx, info, retcode);
  2646. return 0;
  2647. }
  2648. int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
  2649. {
  2650. struct drbd_config_context adm_ctx;
  2651. union drbd_dev_state s;
  2652. enum drbd_ret_code retcode;
  2653. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2654. if (!adm_ctx.reply_skb)
  2655. return retcode;
  2656. if (retcode != NO_ERROR)
  2657. goto out;
  2658. mutex_lock(&adm_ctx.resource->adm_mutex);
  2659. if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
  2660. s = adm_ctx.device->state;
  2661. if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
  2662. retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
  2663. s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
  2664. } else {
  2665. retcode = ERR_PAUSE_IS_CLEAR;
  2666. }
  2667. }
  2668. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2669. out:
  2670. drbd_adm_finish(&adm_ctx, info, retcode);
  2671. return 0;
  2672. }
  2673. int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
  2674. {
  2675. return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
  2676. }
  2677. int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
  2678. {
  2679. struct drbd_config_context adm_ctx;
  2680. struct drbd_device *device;
  2681. int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
  2682. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  2683. if (!adm_ctx.reply_skb)
  2684. return retcode;
  2685. if (retcode != NO_ERROR)
  2686. goto out;
  2687. mutex_lock(&adm_ctx.resource->adm_mutex);
  2688. device = adm_ctx.device;
  2689. if (test_bit(NEW_CUR_UUID, &device->flags)) {
  2690. if (get_ldev_if_state(device, D_ATTACHING)) {
  2691. drbd_uuid_new_current(device);
  2692. put_ldev(device);
  2693. } else {
  2694. /* This is effectively a multi-stage "forced down".
  2695. * The NEW_CUR_UUID bit is supposedly only set, if we
  2696. * lost the replication connection, and are configured
  2697. * to freeze IO and wait for some fence-peer handler.
  2698. * So we still don't have a replication connection.
  2699. * And now we don't have a local disk either. After
  2700. * resume, we will fail all pending and new IO, because
  2701. * we don't have any data anymore. Which means we will
  2702. * eventually be able to terminate all users of this
  2703. * device, and then take it down. By bumping the
  2704. * "effective" data uuid, we make sure that you really
  2705. * need to tear down before you reconfigure, we will
  2706. * the refuse to re-connect or re-attach (because no
  2707. * matching real data uuid exists).
  2708. */
  2709. u64 val;
  2710. get_random_bytes(&val, sizeof(u64));
  2711. drbd_set_ed_uuid(device, val);
  2712. drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
  2713. }
  2714. clear_bit(NEW_CUR_UUID, &device->flags);
  2715. }
  2716. drbd_suspend_io(device);
  2717. retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
  2718. if (retcode == SS_SUCCESS) {
  2719. if (device->state.conn < C_CONNECTED)
  2720. tl_clear(first_peer_device(device)->connection);
  2721. if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
  2722. tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
  2723. }
  2724. drbd_resume_io(device);
  2725. mutex_unlock(&adm_ctx.resource->adm_mutex);
  2726. out:
  2727. drbd_adm_finish(&adm_ctx, info, retcode);
  2728. return 0;
  2729. }
  2730. int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
  2731. {
  2732. return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
  2733. }
  2734. static int nla_put_drbd_cfg_context(struct sk_buff *skb,
  2735. struct drbd_resource *resource,
  2736. struct drbd_connection *connection,
  2737. struct drbd_device *device)
  2738. {
  2739. struct nlattr *nla;
  2740. nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT);
  2741. if (!nla)
  2742. goto nla_put_failure;
  2743. if (device &&
  2744. nla_put_u32(skb, T_ctx_volume, device->vnr))
  2745. goto nla_put_failure;
  2746. if (nla_put_string(skb, T_ctx_resource_name, resource->name))
  2747. goto nla_put_failure;
  2748. if (connection) {
  2749. if (connection->my_addr_len &&
  2750. nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
  2751. goto nla_put_failure;
  2752. if (connection->peer_addr_len &&
  2753. nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
  2754. goto nla_put_failure;
  2755. }
  2756. nla_nest_end(skb, nla);
  2757. return 0;
  2758. nla_put_failure:
  2759. if (nla)
  2760. nla_nest_cancel(skb, nla);
  2761. return -EMSGSIZE;
  2762. }
  2763. /*
  2764. * The generic netlink dump callbacks are called outside the genl_lock(), so
  2765. * they cannot use the simple attribute parsing code which uses global
  2766. * attribute tables.
  2767. */
  2768. static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
  2769. {
  2770. const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
  2771. const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
  2772. struct nlattr *nla;
  2773. nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
  2774. DRBD_NLA_CFG_CONTEXT);
  2775. if (!nla)
  2776. return NULL;
  2777. return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
  2778. }
  2779. static void resource_to_info(struct resource_info *, struct drbd_resource *);
  2780. int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
  2781. {
  2782. struct drbd_genlmsghdr *dh;
  2783. struct drbd_resource *resource;
  2784. struct resource_info resource_info;
  2785. struct resource_statistics resource_statistics;
  2786. int err;
  2787. rcu_read_lock();
  2788. if (cb->args[0]) {
  2789. for_each_resource_rcu(resource, &drbd_resources)
  2790. if (resource == (struct drbd_resource *)cb->args[0])
  2791. goto found_resource;
  2792. err = 0; /* resource was probably deleted */
  2793. goto out;
  2794. }
  2795. resource = list_entry(&drbd_resources,
  2796. struct drbd_resource, resources);
  2797. found_resource:
  2798. list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
  2799. goto put_result;
  2800. }
  2801. err = 0;
  2802. goto out;
  2803. put_result:
  2804. dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  2805. cb->nlh->nlmsg_seq, &drbd_genl_family,
  2806. NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
  2807. err = -ENOMEM;
  2808. if (!dh)
  2809. goto out;
  2810. dh->minor = -1U;
  2811. dh->ret_code = NO_ERROR;
  2812. err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
  2813. if (err)
  2814. goto out;
  2815. err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
  2816. if (err)
  2817. goto out;
  2818. resource_to_info(&resource_info, resource);
  2819. err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
  2820. if (err)
  2821. goto out;
  2822. resource_statistics.res_stat_write_ordering = resource->write_ordering;
  2823. err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
  2824. if (err)
  2825. goto out;
  2826. cb->args[0] = (long)resource;
  2827. genlmsg_end(skb, dh);
  2828. err = 0;
  2829. out:
  2830. rcu_read_unlock();
  2831. if (err)
  2832. return err;
  2833. return skb->len;
  2834. }
  2835. static void device_to_statistics(struct device_statistics *s,
  2836. struct drbd_device *device)
  2837. {
  2838. memset(s, 0, sizeof(*s));
  2839. s->dev_upper_blocked = !may_inc_ap_bio(device);
  2840. if (get_ldev(device)) {
  2841. struct drbd_md *md = &device->ldev->md;
  2842. u64 *history_uuids = (u64 *)s->history_uuids;
  2843. int n;
  2844. spin_lock_irq(&md->uuid_lock);
  2845. s->dev_current_uuid = md->uuid[UI_CURRENT];
  2846. BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
  2847. for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
  2848. history_uuids[n] = md->uuid[UI_HISTORY_START + n];
  2849. for (; n < HISTORY_UUIDS; n++)
  2850. history_uuids[n] = 0;
  2851. s->history_uuids_len = HISTORY_UUIDS;
  2852. spin_unlock_irq(&md->uuid_lock);
  2853. s->dev_disk_flags = md->flags;
  2854. put_ldev(device);
  2855. }
  2856. s->dev_size = get_capacity(device->vdisk);
  2857. s->dev_read = device->read_cnt;
  2858. s->dev_write = device->writ_cnt;
  2859. s->dev_al_writes = device->al_writ_cnt;
  2860. s->dev_bm_writes = device->bm_writ_cnt;
  2861. s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
  2862. s->dev_lower_pending = atomic_read(&device->local_cnt);
  2863. s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
  2864. s->dev_exposed_data_uuid = device->ed_uuid;
  2865. }
  2866. static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
  2867. {
  2868. if (cb->args[0]) {
  2869. struct drbd_resource *resource =
  2870. (struct drbd_resource *)cb->args[0];
  2871. kref_put(&resource->kref, drbd_destroy_resource);
  2872. }
  2873. return 0;
  2874. }
  2875. int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
  2876. return put_resource_in_arg0(cb, 7);
  2877. }
  2878. static void device_to_info(struct device_info *, struct drbd_device *);
  2879. int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
  2880. {
  2881. struct nlattr *resource_filter;
  2882. struct drbd_resource *resource;
  2883. struct drbd_device *device;
  2884. int minor, err, retcode;
  2885. struct drbd_genlmsghdr *dh;
  2886. struct device_info device_info;
  2887. struct device_statistics device_statistics;
  2888. struct idr *idr_to_search;
  2889. resource = (struct drbd_resource *)cb->args[0];
  2890. if (!cb->args[0] && !cb->args[1]) {
  2891. resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
  2892. if (resource_filter) {
  2893. retcode = ERR_RES_NOT_KNOWN;
  2894. resource = drbd_find_resource(nla_data(resource_filter));
  2895. if (!resource)
  2896. goto put_result;
  2897. cb->args[0] = (long)resource;
  2898. }
  2899. }
  2900. rcu_read_lock();
  2901. minor = cb->args[1];
  2902. idr_to_search = resource ? &resource->devices : &drbd_devices;
  2903. device = idr_get_next(idr_to_search, &minor);
  2904. if (!device) {
  2905. err = 0;
  2906. goto out;
  2907. }
  2908. idr_for_each_entry_continue(idr_to_search, device, minor) {
  2909. retcode = NO_ERROR;
  2910. goto put_result; /* only one iteration */
  2911. }
  2912. err = 0;
  2913. goto out; /* no more devices */
  2914. put_result:
  2915. dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  2916. cb->nlh->nlmsg_seq, &drbd_genl_family,
  2917. NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
  2918. err = -ENOMEM;
  2919. if (!dh)
  2920. goto out;
  2921. dh->ret_code = retcode;
  2922. dh->minor = -1U;
  2923. if (retcode == NO_ERROR) {
  2924. dh->minor = device->minor;
  2925. err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
  2926. if (err)
  2927. goto out;
  2928. if (get_ldev(device)) {
  2929. struct disk_conf *disk_conf =
  2930. rcu_dereference(device->ldev->disk_conf);
  2931. err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
  2932. put_ldev(device);
  2933. if (err)
  2934. goto out;
  2935. }
  2936. device_to_info(&device_info, device);
  2937. err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
  2938. if (err)
  2939. goto out;
  2940. device_to_statistics(&device_statistics, device);
  2941. err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
  2942. if (err)
  2943. goto out;
  2944. cb->args[1] = minor + 1;
  2945. }
  2946. genlmsg_end(skb, dh);
  2947. err = 0;
  2948. out:
  2949. rcu_read_unlock();
  2950. if (err)
  2951. return err;
  2952. return skb->len;
  2953. }
  2954. int drbd_adm_dump_connections_done(struct netlink_callback *cb)
  2955. {
  2956. return put_resource_in_arg0(cb, 6);
  2957. }
  2958. enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
  2959. int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
  2960. {
  2961. struct nlattr *resource_filter;
  2962. struct drbd_resource *resource = NULL, *next_resource;
  2963. struct drbd_connection *connection;
  2964. int err = 0, retcode;
  2965. struct drbd_genlmsghdr *dh;
  2966. struct connection_info connection_info;
  2967. struct connection_statistics connection_statistics;
  2968. rcu_read_lock();
  2969. resource = (struct drbd_resource *)cb->args[0];
  2970. if (!cb->args[0]) {
  2971. resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
  2972. if (resource_filter) {
  2973. retcode = ERR_RES_NOT_KNOWN;
  2974. resource = drbd_find_resource(nla_data(resource_filter));
  2975. if (!resource)
  2976. goto put_result;
  2977. cb->args[0] = (long)resource;
  2978. cb->args[1] = SINGLE_RESOURCE;
  2979. }
  2980. }
  2981. if (!resource) {
  2982. if (list_empty(&drbd_resources))
  2983. goto out;
  2984. resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
  2985. kref_get(&resource->kref);
  2986. cb->args[0] = (long)resource;
  2987. cb->args[1] = ITERATE_RESOURCES;
  2988. }
  2989. next_resource:
  2990. rcu_read_unlock();
  2991. mutex_lock(&resource->conf_update);
  2992. rcu_read_lock();
  2993. if (cb->args[2]) {
  2994. for_each_connection_rcu(connection, resource)
  2995. if (connection == (struct drbd_connection *)cb->args[2])
  2996. goto found_connection;
  2997. /* connection was probably deleted */
  2998. goto no_more_connections;
  2999. }
  3000. connection = list_entry(&resource->connections, struct drbd_connection, connections);
  3001. found_connection:
  3002. list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
  3003. if (!has_net_conf(connection))
  3004. continue;
  3005. retcode = NO_ERROR;
  3006. goto put_result; /* only one iteration */
  3007. }
  3008. no_more_connections:
  3009. if (cb->args[1] == ITERATE_RESOURCES) {
  3010. for_each_resource_rcu(next_resource, &drbd_resources) {
  3011. if (next_resource == resource)
  3012. goto found_resource;
  3013. }
  3014. /* resource was probably deleted */
  3015. }
  3016. goto out;
  3017. found_resource:
  3018. list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
  3019. mutex_unlock(&resource->conf_update);
  3020. kref_put(&resource->kref, drbd_destroy_resource);
  3021. resource = next_resource;
  3022. kref_get(&resource->kref);
  3023. cb->args[0] = (long)resource;
  3024. cb->args[2] = 0;
  3025. goto next_resource;
  3026. }
  3027. goto out; /* no more resources */
  3028. put_result:
  3029. dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  3030. cb->nlh->nlmsg_seq, &drbd_genl_family,
  3031. NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
  3032. err = -ENOMEM;
  3033. if (!dh)
  3034. goto out;
  3035. dh->ret_code = retcode;
  3036. dh->minor = -1U;
  3037. if (retcode == NO_ERROR) {
  3038. struct net_conf *net_conf;
  3039. err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
  3040. if (err)
  3041. goto out;
  3042. net_conf = rcu_dereference(connection->net_conf);
  3043. if (net_conf) {
  3044. err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
  3045. if (err)
  3046. goto out;
  3047. }
  3048. connection_to_info(&connection_info, connection);
  3049. err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
  3050. if (err)
  3051. goto out;
  3052. connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
  3053. err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
  3054. if (err)
  3055. goto out;
  3056. cb->args[2] = (long)connection;
  3057. }
  3058. genlmsg_end(skb, dh);
  3059. err = 0;
  3060. out:
  3061. rcu_read_unlock();
  3062. if (resource)
  3063. mutex_unlock(&resource->conf_update);
  3064. if (err)
  3065. return err;
  3066. return skb->len;
  3067. }
  3068. enum mdf_peer_flag {
  3069. MDF_PEER_CONNECTED = 1 << 0,
  3070. MDF_PEER_OUTDATED = 1 << 1,
  3071. MDF_PEER_FENCING = 1 << 2,
  3072. MDF_PEER_FULL_SYNC = 1 << 3,
  3073. };
  3074. static void peer_device_to_statistics(struct peer_device_statistics *s,
  3075. struct drbd_peer_device *peer_device)
  3076. {
  3077. struct drbd_device *device = peer_device->device;
  3078. memset(s, 0, sizeof(*s));
  3079. s->peer_dev_received = device->recv_cnt;
  3080. s->peer_dev_sent = device->send_cnt;
  3081. s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
  3082. atomic_read(&device->rs_pending_cnt);
  3083. s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
  3084. s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
  3085. s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
  3086. if (get_ldev(device)) {
  3087. struct drbd_md *md = &device->ldev->md;
  3088. spin_lock_irq(&md->uuid_lock);
  3089. s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
  3090. spin_unlock_irq(&md->uuid_lock);
  3091. s->peer_dev_flags =
  3092. (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
  3093. MDF_PEER_CONNECTED : 0) +
  3094. (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
  3095. !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
  3096. MDF_PEER_OUTDATED : 0) +
  3097. /* FIXME: MDF_PEER_FENCING? */
  3098. (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
  3099. MDF_PEER_FULL_SYNC : 0);
  3100. put_ldev(device);
  3101. }
  3102. }
  3103. int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
  3104. {
  3105. return put_resource_in_arg0(cb, 9);
  3106. }
  3107. int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
  3108. {
  3109. struct nlattr *resource_filter;
  3110. struct drbd_resource *resource;
  3111. struct drbd_device *device;
  3112. struct drbd_peer_device *peer_device = NULL;
  3113. int minor, err, retcode;
  3114. struct drbd_genlmsghdr *dh;
  3115. struct idr *idr_to_search;
  3116. resource = (struct drbd_resource *)cb->args[0];
  3117. if (!cb->args[0] && !cb->args[1]) {
  3118. resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
  3119. if (resource_filter) {
  3120. retcode = ERR_RES_NOT_KNOWN;
  3121. resource = drbd_find_resource(nla_data(resource_filter));
  3122. if (!resource)
  3123. goto put_result;
  3124. }
  3125. cb->args[0] = (long)resource;
  3126. }
  3127. rcu_read_lock();
  3128. minor = cb->args[1];
  3129. idr_to_search = resource ? &resource->devices : &drbd_devices;
  3130. device = idr_find(idr_to_search, minor);
  3131. if (!device) {
  3132. next_device:
  3133. minor++;
  3134. cb->args[2] = 0;
  3135. device = idr_get_next(idr_to_search, &minor);
  3136. if (!device) {
  3137. err = 0;
  3138. goto out;
  3139. }
  3140. }
  3141. if (cb->args[2]) {
  3142. for_each_peer_device(peer_device, device)
  3143. if (peer_device == (struct drbd_peer_device *)cb->args[2])
  3144. goto found_peer_device;
  3145. /* peer device was probably deleted */
  3146. goto next_device;
  3147. }
  3148. /* Make peer_device point to the list head (not the first entry). */
  3149. peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
  3150. found_peer_device:
  3151. list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
  3152. if (!has_net_conf(peer_device->connection))
  3153. continue;
  3154. retcode = NO_ERROR;
  3155. goto put_result; /* only one iteration */
  3156. }
  3157. goto next_device;
  3158. put_result:
  3159. dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  3160. cb->nlh->nlmsg_seq, &drbd_genl_family,
  3161. NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
  3162. err = -ENOMEM;
  3163. if (!dh)
  3164. goto out;
  3165. dh->ret_code = retcode;
  3166. dh->minor = -1U;
  3167. if (retcode == NO_ERROR) {
  3168. struct peer_device_info peer_device_info;
  3169. struct peer_device_statistics peer_device_statistics;
  3170. dh->minor = minor;
  3171. err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
  3172. if (err)
  3173. goto out;
  3174. peer_device_to_info(&peer_device_info, peer_device);
  3175. err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
  3176. if (err)
  3177. goto out;
  3178. peer_device_to_statistics(&peer_device_statistics, peer_device);
  3179. err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
  3180. if (err)
  3181. goto out;
  3182. cb->args[1] = minor;
  3183. cb->args[2] = (long)peer_device;
  3184. }
  3185. genlmsg_end(skb, dh);
  3186. err = 0;
  3187. out:
  3188. rcu_read_unlock();
  3189. if (err)
  3190. return err;
  3191. return skb->len;
  3192. }
  3193. /*
  3194. * Return the connection of @resource if @resource has exactly one connection.
  3195. */
  3196. static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
  3197. {
  3198. struct list_head *connections = &resource->connections;
  3199. if (list_empty(connections) || connections->next->next != connections)
  3200. return NULL;
  3201. return list_first_entry(&resource->connections, struct drbd_connection, connections);
  3202. }
  3203. static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
  3204. const struct sib_info *sib)
  3205. {
  3206. struct drbd_resource *resource = device->resource;
  3207. struct state_info *si = NULL; /* for sizeof(si->member); */
  3208. struct nlattr *nla;
  3209. int got_ldev;
  3210. int err = 0;
  3211. int exclude_sensitive;
  3212. /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
  3213. * to. So we better exclude_sensitive information.
  3214. *
  3215. * If sib == NULL, this is drbd_adm_get_status, executed synchronously
  3216. * in the context of the requesting user process. Exclude sensitive
  3217. * information, unless current has superuser.
  3218. *
  3219. * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
  3220. * relies on the current implementation of netlink_dump(), which
  3221. * executes the dump callback successively from netlink_recvmsg(),
  3222. * always in the context of the receiving process */
  3223. exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
  3224. got_ldev = get_ldev(device);
  3225. /* We need to add connection name and volume number information still.
  3226. * Minor number is in drbd_genlmsghdr. */
  3227. if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
  3228. goto nla_put_failure;
  3229. if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
  3230. goto nla_put_failure;
  3231. rcu_read_lock();
  3232. if (got_ldev) {
  3233. struct disk_conf *disk_conf;
  3234. disk_conf = rcu_dereference(device->ldev->disk_conf);
  3235. err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
  3236. }
  3237. if (!err) {
  3238. struct net_conf *nc;
  3239. nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
  3240. if (nc)
  3241. err = net_conf_to_skb(skb, nc, exclude_sensitive);
  3242. }
  3243. rcu_read_unlock();
  3244. if (err)
  3245. goto nla_put_failure;
  3246. nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO);
  3247. if (!nla)
  3248. goto nla_put_failure;
  3249. if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
  3250. nla_put_u32(skb, T_current_state, device->state.i) ||
  3251. nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
  3252. nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) ||
  3253. nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
  3254. nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
  3255. nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
  3256. nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
  3257. nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
  3258. nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
  3259. nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
  3260. nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
  3261. nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
  3262. goto nla_put_failure;
  3263. if (got_ldev) {
  3264. int err;
  3265. spin_lock_irq(&device->ldev->md.uuid_lock);
  3266. err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
  3267. spin_unlock_irq(&device->ldev->md.uuid_lock);
  3268. if (err)
  3269. goto nla_put_failure;
  3270. if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
  3271. nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
  3272. nla_put_u64_0pad(skb, T_bits_oos,
  3273. drbd_bm_total_weight(device)))
  3274. goto nla_put_failure;
  3275. if (C_SYNC_SOURCE <= device->state.conn &&
  3276. C_PAUSED_SYNC_T >= device->state.conn) {
  3277. if (nla_put_u64_0pad(skb, T_bits_rs_total,
  3278. device->rs_total) ||
  3279. nla_put_u64_0pad(skb, T_bits_rs_failed,
  3280. device->rs_failed))
  3281. goto nla_put_failure;
  3282. }
  3283. }
  3284. if (sib) {
  3285. switch(sib->sib_reason) {
  3286. case SIB_SYNC_PROGRESS:
  3287. case SIB_GET_STATUS_REPLY:
  3288. break;
  3289. case SIB_STATE_CHANGE:
  3290. if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
  3291. nla_put_u32(skb, T_new_state, sib->ns.i))
  3292. goto nla_put_failure;
  3293. break;
  3294. case SIB_HELPER_POST:
  3295. if (nla_put_u32(skb, T_helper_exit_code,
  3296. sib->helper_exit_code))
  3297. goto nla_put_failure;
  3298. fallthrough;
  3299. case SIB_HELPER_PRE:
  3300. if (nla_put_string(skb, T_helper, sib->helper_name))
  3301. goto nla_put_failure;
  3302. break;
  3303. }
  3304. }
  3305. nla_nest_end(skb, nla);
  3306. if (0)
  3307. nla_put_failure:
  3308. err = -EMSGSIZE;
  3309. if (got_ldev)
  3310. put_ldev(device);
  3311. return err;
  3312. }
  3313. int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
  3314. {
  3315. struct drbd_config_context adm_ctx;
  3316. enum drbd_ret_code retcode;
  3317. int err;
  3318. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  3319. if (!adm_ctx.reply_skb)
  3320. return retcode;
  3321. if (retcode != NO_ERROR)
  3322. goto out;
  3323. err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
  3324. if (err) {
  3325. nlmsg_free(adm_ctx.reply_skb);
  3326. return err;
  3327. }
  3328. out:
  3329. drbd_adm_finish(&adm_ctx, info, retcode);
  3330. return 0;
  3331. }
  3332. static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
  3333. {
  3334. struct drbd_device *device;
  3335. struct drbd_genlmsghdr *dh;
  3336. struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
  3337. struct drbd_resource *resource = NULL;
  3338. struct drbd_resource *tmp;
  3339. unsigned volume = cb->args[1];
  3340. /* Open coded, deferred, iteration:
  3341. * for_each_resource_safe(resource, tmp, &drbd_resources) {
  3342. * connection = "first connection of resource or undefined";
  3343. * idr_for_each_entry(&resource->devices, device, i) {
  3344. * ...
  3345. * }
  3346. * }
  3347. * where resource is cb->args[0];
  3348. * and i is cb->args[1];
  3349. *
  3350. * cb->args[2] indicates if we shall loop over all resources,
  3351. * or just dump all volumes of a single resource.
  3352. *
  3353. * This may miss entries inserted after this dump started,
  3354. * or entries deleted before they are reached.
  3355. *
  3356. * We need to make sure the device won't disappear while
  3357. * we are looking at it, and revalidate our iterators
  3358. * on each iteration.
  3359. */
  3360. /* synchronize with conn_create()/drbd_destroy_connection() */
  3361. rcu_read_lock();
  3362. /* revalidate iterator position */
  3363. for_each_resource_rcu(tmp, &drbd_resources) {
  3364. if (pos == NULL) {
  3365. /* first iteration */
  3366. pos = tmp;
  3367. resource = pos;
  3368. break;
  3369. }
  3370. if (tmp == pos) {
  3371. resource = pos;
  3372. break;
  3373. }
  3374. }
  3375. if (resource) {
  3376. next_resource:
  3377. device = idr_get_next(&resource->devices, &volume);
  3378. if (!device) {
  3379. /* No more volumes to dump on this resource.
  3380. * Advance resource iterator. */
  3381. pos = list_entry_rcu(resource->resources.next,
  3382. struct drbd_resource, resources);
  3383. /* Did we dump any volume of this resource yet? */
  3384. if (volume != 0) {
  3385. /* If we reached the end of the list,
  3386. * or only a single resource dump was requested,
  3387. * we are done. */
  3388. if (&pos->resources == &drbd_resources || cb->args[2])
  3389. goto out;
  3390. volume = 0;
  3391. resource = pos;
  3392. goto next_resource;
  3393. }
  3394. }
  3395. dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  3396. cb->nlh->nlmsg_seq, &drbd_genl_family,
  3397. NLM_F_MULTI, DRBD_ADM_GET_STATUS);
  3398. if (!dh)
  3399. goto out;
  3400. if (!device) {
  3401. /* This is a connection without a single volume.
  3402. * Suprisingly enough, it may have a network
  3403. * configuration. */
  3404. struct drbd_connection *connection;
  3405. dh->minor = -1U;
  3406. dh->ret_code = NO_ERROR;
  3407. connection = the_only_connection(resource);
  3408. if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
  3409. goto cancel;
  3410. if (connection) {
  3411. struct net_conf *nc;
  3412. nc = rcu_dereference(connection->net_conf);
  3413. if (nc && net_conf_to_skb(skb, nc, 1) != 0)
  3414. goto cancel;
  3415. }
  3416. goto done;
  3417. }
  3418. D_ASSERT(device, device->vnr == volume);
  3419. D_ASSERT(device, device->resource == resource);
  3420. dh->minor = device_to_minor(device);
  3421. dh->ret_code = NO_ERROR;
  3422. if (nla_put_status_info(skb, device, NULL)) {
  3423. cancel:
  3424. genlmsg_cancel(skb, dh);
  3425. goto out;
  3426. }
  3427. done:
  3428. genlmsg_end(skb, dh);
  3429. }
  3430. out:
  3431. rcu_read_unlock();
  3432. /* where to start the next iteration */
  3433. cb->args[0] = (long)pos;
  3434. cb->args[1] = (pos == resource) ? volume + 1 : 0;
  3435. /* No more resources/volumes/minors found results in an empty skb.
  3436. * Which will terminate the dump. */
  3437. return skb->len;
  3438. }
  3439. /*
  3440. * Request status of all resources, or of all volumes within a single resource.
  3441. *
  3442. * This is a dump, as the answer may not fit in a single reply skb otherwise.
  3443. * Which means we cannot use the family->attrbuf or other such members, because
  3444. * dump is NOT protected by the genl_lock(). During dump, we only have access
  3445. * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
  3446. *
  3447. * Once things are setup properly, we call into get_one_status().
  3448. */
  3449. int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
  3450. {
  3451. const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
  3452. struct nlattr *nla;
  3453. const char *resource_name;
  3454. struct drbd_resource *resource;
  3455. int maxtype;
  3456. /* Is this a followup call? */
  3457. if (cb->args[0]) {
  3458. /* ... of a single resource dump,
  3459. * and the resource iterator has been advanced already? */
  3460. if (cb->args[2] && cb->args[2] != cb->args[0])
  3461. return 0; /* DONE. */
  3462. goto dump;
  3463. }
  3464. /* First call (from netlink_dump_start). We need to figure out
  3465. * which resource(s) the user wants us to dump. */
  3466. nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
  3467. nlmsg_attrlen(cb->nlh, hdrlen),
  3468. DRBD_NLA_CFG_CONTEXT);
  3469. /* No explicit context given. Dump all. */
  3470. if (!nla)
  3471. goto dump;
  3472. maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
  3473. nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
  3474. if (IS_ERR(nla))
  3475. return PTR_ERR(nla);
  3476. /* context given, but no name present? */
  3477. if (!nla)
  3478. return -EINVAL;
  3479. resource_name = nla_data(nla);
  3480. if (!*resource_name)
  3481. return -ENODEV;
  3482. resource = drbd_find_resource(resource_name);
  3483. if (!resource)
  3484. return -ENODEV;
  3485. kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
  3486. /* prime iterators, and set "filter" mode mark:
  3487. * only dump this connection. */
  3488. cb->args[0] = (long)resource;
  3489. /* cb->args[1] = 0; passed in this way. */
  3490. cb->args[2] = (long)resource;
  3491. dump:
  3492. return get_one_status(skb, cb);
  3493. }
  3494. int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
  3495. {
  3496. struct drbd_config_context adm_ctx;
  3497. enum drbd_ret_code retcode;
  3498. struct timeout_parms tp;
  3499. int err;
  3500. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  3501. if (!adm_ctx.reply_skb)
  3502. return retcode;
  3503. if (retcode != NO_ERROR)
  3504. goto out;
  3505. tp.timeout_type =
  3506. adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
  3507. test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
  3508. UT_DEFAULT;
  3509. err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
  3510. if (err) {
  3511. nlmsg_free(adm_ctx.reply_skb);
  3512. return err;
  3513. }
  3514. out:
  3515. drbd_adm_finish(&adm_ctx, info, retcode);
  3516. return 0;
  3517. }
  3518. int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
  3519. {
  3520. struct drbd_config_context adm_ctx;
  3521. struct drbd_device *device;
  3522. enum drbd_ret_code retcode;
  3523. struct start_ov_parms parms;
  3524. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  3525. if (!adm_ctx.reply_skb)
  3526. return retcode;
  3527. if (retcode != NO_ERROR)
  3528. goto out;
  3529. device = adm_ctx.device;
  3530. /* resume from last known position, if possible */
  3531. parms.ov_start_sector = device->ov_start_sector;
  3532. parms.ov_stop_sector = ULLONG_MAX;
  3533. if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
  3534. int err = start_ov_parms_from_attrs(&parms, info);
  3535. if (err) {
  3536. retcode = ERR_MANDATORY_TAG;
  3537. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  3538. goto out;
  3539. }
  3540. }
  3541. mutex_lock(&adm_ctx.resource->adm_mutex);
  3542. /* w_make_ov_request expects position to be aligned */
  3543. device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
  3544. device->ov_stop_sector = parms.ov_stop_sector;
  3545. /* If there is still bitmap IO pending, e.g. previous resync or verify
  3546. * just being finished, wait for it before requesting a new resync. */
  3547. drbd_suspend_io(device);
  3548. wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
  3549. retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
  3550. drbd_resume_io(device);
  3551. mutex_unlock(&adm_ctx.resource->adm_mutex);
  3552. out:
  3553. drbd_adm_finish(&adm_ctx, info, retcode);
  3554. return 0;
  3555. }
  3556. int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
  3557. {
  3558. struct drbd_config_context adm_ctx;
  3559. struct drbd_device *device;
  3560. enum drbd_ret_code retcode;
  3561. int skip_initial_sync = 0;
  3562. int err;
  3563. struct new_c_uuid_parms args;
  3564. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  3565. if (!adm_ctx.reply_skb)
  3566. return retcode;
  3567. if (retcode != NO_ERROR)
  3568. goto out_nolock;
  3569. device = adm_ctx.device;
  3570. memset(&args, 0, sizeof(args));
  3571. if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
  3572. err = new_c_uuid_parms_from_attrs(&args, info);
  3573. if (err) {
  3574. retcode = ERR_MANDATORY_TAG;
  3575. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  3576. goto out_nolock;
  3577. }
  3578. }
  3579. mutex_lock(&adm_ctx.resource->adm_mutex);
  3580. mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
  3581. if (!get_ldev(device)) {
  3582. retcode = ERR_NO_DISK;
  3583. goto out;
  3584. }
  3585. /* this is "skip initial sync", assume to be clean */
  3586. if (device->state.conn == C_CONNECTED &&
  3587. first_peer_device(device)->connection->agreed_pro_version >= 90 &&
  3588. device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
  3589. drbd_info(device, "Preparing to skip initial sync\n");
  3590. skip_initial_sync = 1;
  3591. } else if (device->state.conn != C_STANDALONE) {
  3592. retcode = ERR_CONNECTED;
  3593. goto out_dec;
  3594. }
  3595. drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
  3596. drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
  3597. if (args.clear_bm) {
  3598. err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
  3599. "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL);
  3600. if (err) {
  3601. drbd_err(device, "Writing bitmap failed with %d\n", err);
  3602. retcode = ERR_IO_MD_DISK;
  3603. }
  3604. if (skip_initial_sync) {
  3605. drbd_send_uuids_skip_initial_sync(first_peer_device(device));
  3606. _drbd_uuid_set(device, UI_BITMAP, 0);
  3607. drbd_print_uuids(device, "cleared bitmap UUID");
  3608. spin_lock_irq(&device->resource->req_lock);
  3609. _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
  3610. CS_VERBOSE, NULL);
  3611. spin_unlock_irq(&device->resource->req_lock);
  3612. }
  3613. }
  3614. drbd_md_sync(device);
  3615. out_dec:
  3616. put_ldev(device);
  3617. out:
  3618. mutex_unlock(device->state_mutex);
  3619. mutex_unlock(&adm_ctx.resource->adm_mutex);
  3620. out_nolock:
  3621. drbd_adm_finish(&adm_ctx, info, retcode);
  3622. return 0;
  3623. }
  3624. static enum drbd_ret_code
  3625. drbd_check_resource_name(struct drbd_config_context *adm_ctx)
  3626. {
  3627. const char *name = adm_ctx->resource_name;
  3628. if (!name || !name[0]) {
  3629. drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
  3630. return ERR_MANDATORY_TAG;
  3631. }
  3632. /* if we want to use these in sysfs/configfs/debugfs some day,
  3633. * we must not allow slashes */
  3634. if (strchr(name, '/')) {
  3635. drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
  3636. return ERR_INVALID_REQUEST;
  3637. }
  3638. return NO_ERROR;
  3639. }
  3640. static void resource_to_info(struct resource_info *info,
  3641. struct drbd_resource *resource)
  3642. {
  3643. info->res_role = conn_highest_role(first_connection(resource));
  3644. info->res_susp = resource->susp;
  3645. info->res_susp_nod = resource->susp_nod;
  3646. info->res_susp_fen = resource->susp_fen;
  3647. }
  3648. int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
  3649. {
  3650. struct drbd_connection *connection;
  3651. struct drbd_config_context adm_ctx;
  3652. enum drbd_ret_code retcode;
  3653. struct res_opts res_opts;
  3654. int err;
  3655. retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
  3656. if (!adm_ctx.reply_skb)
  3657. return retcode;
  3658. if (retcode != NO_ERROR)
  3659. goto out;
  3660. set_res_opts_defaults(&res_opts);
  3661. err = res_opts_from_attrs(&res_opts, info);
  3662. if (err && err != -ENOMSG) {
  3663. retcode = ERR_MANDATORY_TAG;
  3664. drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
  3665. goto out;
  3666. }
  3667. retcode = drbd_check_resource_name(&adm_ctx);
  3668. if (retcode != NO_ERROR)
  3669. goto out;
  3670. if (adm_ctx.resource) {
  3671. if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
  3672. retcode = ERR_INVALID_REQUEST;
  3673. drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
  3674. }
  3675. /* else: still NO_ERROR */
  3676. goto out;
  3677. }
  3678. /* not yet safe for genl_family.parallel_ops */
  3679. mutex_lock(&resources_mutex);
  3680. connection = conn_create(adm_ctx.resource_name, &res_opts);
  3681. mutex_unlock(&resources_mutex);
  3682. if (connection) {
  3683. struct resource_info resource_info;
  3684. mutex_lock(&notification_mutex);
  3685. resource_to_info(&resource_info, connection->resource);
  3686. notify_resource_state(NULL, 0, connection->resource,
  3687. &resource_info, NOTIFY_CREATE);
  3688. mutex_unlock(&notification_mutex);
  3689. } else
  3690. retcode = ERR_NOMEM;
  3691. out:
  3692. drbd_adm_finish(&adm_ctx, info, retcode);
  3693. return 0;
  3694. }
  3695. static void device_to_info(struct device_info *info,
  3696. struct drbd_device *device)
  3697. {
  3698. info->dev_disk_state = device->state.disk;
  3699. }
  3700. int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
  3701. {
  3702. struct drbd_config_context adm_ctx;
  3703. struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
  3704. enum drbd_ret_code retcode;
  3705. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
  3706. if (!adm_ctx.reply_skb)
  3707. return retcode;
  3708. if (retcode != NO_ERROR)
  3709. goto out;
  3710. if (dh->minor > MINORMASK) {
  3711. drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
  3712. retcode = ERR_INVALID_REQUEST;
  3713. goto out;
  3714. }
  3715. if (adm_ctx.volume > DRBD_VOLUME_MAX) {
  3716. drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
  3717. retcode = ERR_INVALID_REQUEST;
  3718. goto out;
  3719. }
  3720. /* drbd_adm_prepare made sure already
  3721. * that first_peer_device(device)->connection and device->vnr match the request. */
  3722. if (adm_ctx.device) {
  3723. if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
  3724. retcode = ERR_MINOR_OR_VOLUME_EXISTS;
  3725. /* else: still NO_ERROR */
  3726. goto out;
  3727. }
  3728. mutex_lock(&adm_ctx.resource->adm_mutex);
  3729. retcode = drbd_create_device(&adm_ctx, dh->minor);
  3730. if (retcode == NO_ERROR) {
  3731. struct drbd_device *device;
  3732. struct drbd_peer_device *peer_device;
  3733. struct device_info info;
  3734. unsigned int peer_devices = 0;
  3735. enum drbd_notification_type flags;
  3736. device = minor_to_device(dh->minor);
  3737. for_each_peer_device(peer_device, device) {
  3738. if (!has_net_conf(peer_device->connection))
  3739. continue;
  3740. peer_devices++;
  3741. }
  3742. device_to_info(&info, device);
  3743. mutex_lock(&notification_mutex);
  3744. flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
  3745. notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
  3746. for_each_peer_device(peer_device, device) {
  3747. struct peer_device_info peer_device_info;
  3748. if (!has_net_conf(peer_device->connection))
  3749. continue;
  3750. peer_device_to_info(&peer_device_info, peer_device);
  3751. flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
  3752. notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
  3753. NOTIFY_CREATE | flags);
  3754. }
  3755. mutex_unlock(&notification_mutex);
  3756. }
  3757. mutex_unlock(&adm_ctx.resource->adm_mutex);
  3758. out:
  3759. drbd_adm_finish(&adm_ctx, info, retcode);
  3760. return 0;
  3761. }
  3762. static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
  3763. {
  3764. struct drbd_peer_device *peer_device;
  3765. if (device->state.disk == D_DISKLESS &&
  3766. /* no need to be device->state.conn == C_STANDALONE &&
  3767. * we may want to delete a minor from a live replication group.
  3768. */
  3769. device->state.role == R_SECONDARY) {
  3770. struct drbd_connection *connection =
  3771. first_connection(device->resource);
  3772. _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
  3773. CS_VERBOSE + CS_WAIT_COMPLETE);
  3774. /* If the state engine hasn't stopped the sender thread yet, we
  3775. * need to flush the sender work queue before generating the
  3776. * DESTROY events here. */
  3777. if (get_t_state(&connection->worker) == RUNNING)
  3778. drbd_flush_workqueue(&connection->sender_work);
  3779. mutex_lock(&notification_mutex);
  3780. for_each_peer_device(peer_device, device) {
  3781. if (!has_net_conf(peer_device->connection))
  3782. continue;
  3783. notify_peer_device_state(NULL, 0, peer_device, NULL,
  3784. NOTIFY_DESTROY | NOTIFY_CONTINUES);
  3785. }
  3786. notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
  3787. mutex_unlock(&notification_mutex);
  3788. drbd_delete_device(device);
  3789. return NO_ERROR;
  3790. } else
  3791. return ERR_MINOR_CONFIGURED;
  3792. }
  3793. int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
  3794. {
  3795. struct drbd_config_context adm_ctx;
  3796. enum drbd_ret_code retcode;
  3797. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
  3798. if (!adm_ctx.reply_skb)
  3799. return retcode;
  3800. if (retcode != NO_ERROR)
  3801. goto out;
  3802. mutex_lock(&adm_ctx.resource->adm_mutex);
  3803. retcode = adm_del_minor(adm_ctx.device);
  3804. mutex_unlock(&adm_ctx.resource->adm_mutex);
  3805. out:
  3806. drbd_adm_finish(&adm_ctx, info, retcode);
  3807. return 0;
  3808. }
  3809. static int adm_del_resource(struct drbd_resource *resource)
  3810. {
  3811. struct drbd_connection *connection;
  3812. for_each_connection(connection, resource) {
  3813. if (connection->cstate > C_STANDALONE)
  3814. return ERR_NET_CONFIGURED;
  3815. }
  3816. if (!idr_is_empty(&resource->devices))
  3817. return ERR_RES_IN_USE;
  3818. /* The state engine has stopped the sender thread, so we don't
  3819. * need to flush the sender work queue before generating the
  3820. * DESTROY event here. */
  3821. mutex_lock(&notification_mutex);
  3822. notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
  3823. mutex_unlock(&notification_mutex);
  3824. mutex_lock(&resources_mutex);
  3825. list_del_rcu(&resource->resources);
  3826. mutex_unlock(&resources_mutex);
  3827. /* Make sure all threads have actually stopped: state handling only
  3828. * does drbd_thread_stop_nowait(). */
  3829. list_for_each_entry(connection, &resource->connections, connections)
  3830. drbd_thread_stop(&connection->worker);
  3831. synchronize_rcu();
  3832. drbd_free_resource(resource);
  3833. return NO_ERROR;
  3834. }
  3835. int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
  3836. {
  3837. struct drbd_config_context adm_ctx;
  3838. struct drbd_resource *resource;
  3839. struct drbd_connection *connection;
  3840. struct drbd_device *device;
  3841. int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
  3842. unsigned i;
  3843. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
  3844. if (!adm_ctx.reply_skb)
  3845. return retcode;
  3846. if (retcode != NO_ERROR)
  3847. goto finish;
  3848. resource = adm_ctx.resource;
  3849. mutex_lock(&resource->adm_mutex);
  3850. /* demote */
  3851. for_each_connection(connection, resource) {
  3852. struct drbd_peer_device *peer_device;
  3853. idr_for_each_entry(&connection->peer_devices, peer_device, i) {
  3854. retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
  3855. if (retcode < SS_SUCCESS) {
  3856. drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
  3857. goto out;
  3858. }
  3859. }
  3860. retcode = conn_try_disconnect(connection, 0);
  3861. if (retcode < SS_SUCCESS) {
  3862. drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
  3863. goto out;
  3864. }
  3865. }
  3866. /* detach */
  3867. idr_for_each_entry(&resource->devices, device, i) {
  3868. retcode = adm_detach(device, 0);
  3869. if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
  3870. drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
  3871. goto out;
  3872. }
  3873. }
  3874. /* delete volumes */
  3875. idr_for_each_entry(&resource->devices, device, i) {
  3876. retcode = adm_del_minor(device);
  3877. if (retcode != NO_ERROR) {
  3878. /* "can not happen" */
  3879. drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
  3880. goto out;
  3881. }
  3882. }
  3883. retcode = adm_del_resource(resource);
  3884. out:
  3885. mutex_unlock(&resource->adm_mutex);
  3886. finish:
  3887. drbd_adm_finish(&adm_ctx, info, retcode);
  3888. return 0;
  3889. }
  3890. int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
  3891. {
  3892. struct drbd_config_context adm_ctx;
  3893. struct drbd_resource *resource;
  3894. enum drbd_ret_code retcode;
  3895. retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
  3896. if (!adm_ctx.reply_skb)
  3897. return retcode;
  3898. if (retcode != NO_ERROR)
  3899. goto finish;
  3900. resource = adm_ctx.resource;
  3901. mutex_lock(&resource->adm_mutex);
  3902. retcode = adm_del_resource(resource);
  3903. mutex_unlock(&resource->adm_mutex);
  3904. finish:
  3905. drbd_adm_finish(&adm_ctx, info, retcode);
  3906. return 0;
  3907. }
  3908. void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
  3909. {
  3910. struct sk_buff *msg;
  3911. struct drbd_genlmsghdr *d_out;
  3912. unsigned seq;
  3913. int err = -ENOMEM;
  3914. seq = atomic_inc_return(&drbd_genl_seq);
  3915. msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  3916. if (!msg)
  3917. goto failed;
  3918. err = -EMSGSIZE;
  3919. d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
  3920. if (!d_out) /* cannot happen, but anyways. */
  3921. goto nla_put_failure;
  3922. d_out->minor = device_to_minor(device);
  3923. d_out->ret_code = NO_ERROR;
  3924. if (nla_put_status_info(msg, device, sib))
  3925. goto nla_put_failure;
  3926. genlmsg_end(msg, d_out);
  3927. err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
  3928. /* msg has been consumed or freed in netlink_broadcast() */
  3929. if (err && err != -ESRCH)
  3930. goto failed;
  3931. return;
  3932. nla_put_failure:
  3933. nlmsg_free(msg);
  3934. failed:
  3935. drbd_err(device, "Error %d while broadcasting event. "
  3936. "Event seq:%u sib_reason:%u\n",
  3937. err, seq, sib->sib_reason);
  3938. }
  3939. static int nla_put_notification_header(struct sk_buff *msg,
  3940. enum drbd_notification_type type)
  3941. {
  3942. struct drbd_notification_header nh = {
  3943. .nh_type = type,
  3944. };
  3945. return drbd_notification_header_to_skb(msg, &nh, true);
  3946. }
  3947. int notify_resource_state(struct sk_buff *skb,
  3948. unsigned int seq,
  3949. struct drbd_resource *resource,
  3950. struct resource_info *resource_info,
  3951. enum drbd_notification_type type)
  3952. {
  3953. struct resource_statistics resource_statistics;
  3954. struct drbd_genlmsghdr *dh;
  3955. bool multicast = false;
  3956. int err;
  3957. if (!skb) {
  3958. seq = atomic_inc_return(&notify_genl_seq);
  3959. skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  3960. err = -ENOMEM;
  3961. if (!skb)
  3962. goto failed;
  3963. multicast = true;
  3964. }
  3965. err = -EMSGSIZE;
  3966. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
  3967. if (!dh)
  3968. goto nla_put_failure;
  3969. dh->minor = -1U;
  3970. dh->ret_code = NO_ERROR;
  3971. if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
  3972. nla_put_notification_header(skb, type) ||
  3973. ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
  3974. resource_info_to_skb(skb, resource_info, true)))
  3975. goto nla_put_failure;
  3976. resource_statistics.res_stat_write_ordering = resource->write_ordering;
  3977. err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
  3978. if (err)
  3979. goto nla_put_failure;
  3980. genlmsg_end(skb, dh);
  3981. if (multicast) {
  3982. err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
  3983. /* skb has been consumed or freed in netlink_broadcast() */
  3984. if (err && err != -ESRCH)
  3985. goto failed;
  3986. }
  3987. return 0;
  3988. nla_put_failure:
  3989. nlmsg_free(skb);
  3990. failed:
  3991. drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
  3992. err, seq);
  3993. return err;
  3994. }
  3995. int notify_device_state(struct sk_buff *skb,
  3996. unsigned int seq,
  3997. struct drbd_device *device,
  3998. struct device_info *device_info,
  3999. enum drbd_notification_type type)
  4000. {
  4001. struct device_statistics device_statistics;
  4002. struct drbd_genlmsghdr *dh;
  4003. bool multicast = false;
  4004. int err;
  4005. if (!skb) {
  4006. seq = atomic_inc_return(&notify_genl_seq);
  4007. skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  4008. err = -ENOMEM;
  4009. if (!skb)
  4010. goto failed;
  4011. multicast = true;
  4012. }
  4013. err = -EMSGSIZE;
  4014. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
  4015. if (!dh)
  4016. goto nla_put_failure;
  4017. dh->minor = device->minor;
  4018. dh->ret_code = NO_ERROR;
  4019. if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
  4020. nla_put_notification_header(skb, type) ||
  4021. ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
  4022. device_info_to_skb(skb, device_info, true)))
  4023. goto nla_put_failure;
  4024. device_to_statistics(&device_statistics, device);
  4025. device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
  4026. genlmsg_end(skb, dh);
  4027. if (multicast) {
  4028. err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
  4029. /* skb has been consumed or freed in netlink_broadcast() */
  4030. if (err && err != -ESRCH)
  4031. goto failed;
  4032. }
  4033. return 0;
  4034. nla_put_failure:
  4035. nlmsg_free(skb);
  4036. failed:
  4037. drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
  4038. err, seq);
  4039. return err;
  4040. }
  4041. int notify_connection_state(struct sk_buff *skb,
  4042. unsigned int seq,
  4043. struct drbd_connection *connection,
  4044. struct connection_info *connection_info,
  4045. enum drbd_notification_type type)
  4046. {
  4047. struct connection_statistics connection_statistics;
  4048. struct drbd_genlmsghdr *dh;
  4049. bool multicast = false;
  4050. int err;
  4051. if (!skb) {
  4052. seq = atomic_inc_return(&notify_genl_seq);
  4053. skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  4054. err = -ENOMEM;
  4055. if (!skb)
  4056. goto failed;
  4057. multicast = true;
  4058. }
  4059. err = -EMSGSIZE;
  4060. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
  4061. if (!dh)
  4062. goto nla_put_failure;
  4063. dh->minor = -1U;
  4064. dh->ret_code = NO_ERROR;
  4065. if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
  4066. nla_put_notification_header(skb, type) ||
  4067. ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
  4068. connection_info_to_skb(skb, connection_info, true)))
  4069. goto nla_put_failure;
  4070. connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
  4071. connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
  4072. genlmsg_end(skb, dh);
  4073. if (multicast) {
  4074. err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
  4075. /* skb has been consumed or freed in netlink_broadcast() */
  4076. if (err && err != -ESRCH)
  4077. goto failed;
  4078. }
  4079. return 0;
  4080. nla_put_failure:
  4081. nlmsg_free(skb);
  4082. failed:
  4083. drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
  4084. err, seq);
  4085. return err;
  4086. }
  4087. int notify_peer_device_state(struct sk_buff *skb,
  4088. unsigned int seq,
  4089. struct drbd_peer_device *peer_device,
  4090. struct peer_device_info *peer_device_info,
  4091. enum drbd_notification_type type)
  4092. {
  4093. struct peer_device_statistics peer_device_statistics;
  4094. struct drbd_resource *resource = peer_device->device->resource;
  4095. struct drbd_genlmsghdr *dh;
  4096. bool multicast = false;
  4097. int err;
  4098. if (!skb) {
  4099. seq = atomic_inc_return(&notify_genl_seq);
  4100. skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  4101. err = -ENOMEM;
  4102. if (!skb)
  4103. goto failed;
  4104. multicast = true;
  4105. }
  4106. err = -EMSGSIZE;
  4107. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
  4108. if (!dh)
  4109. goto nla_put_failure;
  4110. dh->minor = -1U;
  4111. dh->ret_code = NO_ERROR;
  4112. if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
  4113. nla_put_notification_header(skb, type) ||
  4114. ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
  4115. peer_device_info_to_skb(skb, peer_device_info, true)))
  4116. goto nla_put_failure;
  4117. peer_device_to_statistics(&peer_device_statistics, peer_device);
  4118. peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
  4119. genlmsg_end(skb, dh);
  4120. if (multicast) {
  4121. err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
  4122. /* skb has been consumed or freed in netlink_broadcast() */
  4123. if (err && err != -ESRCH)
  4124. goto failed;
  4125. }
  4126. return 0;
  4127. nla_put_failure:
  4128. nlmsg_free(skb);
  4129. failed:
  4130. drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
  4131. err, seq);
  4132. return err;
  4133. }
  4134. void notify_helper(enum drbd_notification_type type,
  4135. struct drbd_device *device, struct drbd_connection *connection,
  4136. const char *name, int status)
  4137. {
  4138. struct drbd_resource *resource = device ? device->resource : connection->resource;
  4139. struct drbd_helper_info helper_info;
  4140. unsigned int seq = atomic_inc_return(&notify_genl_seq);
  4141. struct sk_buff *skb = NULL;
  4142. struct drbd_genlmsghdr *dh;
  4143. int err;
  4144. strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
  4145. helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
  4146. helper_info.helper_status = status;
  4147. skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
  4148. err = -ENOMEM;
  4149. if (!skb)
  4150. goto fail;
  4151. err = -EMSGSIZE;
  4152. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
  4153. if (!dh)
  4154. goto fail;
  4155. dh->minor = device ? device->minor : -1;
  4156. dh->ret_code = NO_ERROR;
  4157. mutex_lock(&notification_mutex);
  4158. if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
  4159. nla_put_notification_header(skb, type) ||
  4160. drbd_helper_info_to_skb(skb, &helper_info, true))
  4161. goto unlock_fail;
  4162. genlmsg_end(skb, dh);
  4163. err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
  4164. skb = NULL;
  4165. /* skb has been consumed or freed in netlink_broadcast() */
  4166. if (err && err != -ESRCH)
  4167. goto unlock_fail;
  4168. mutex_unlock(&notification_mutex);
  4169. return;
  4170. unlock_fail:
  4171. mutex_unlock(&notification_mutex);
  4172. fail:
  4173. nlmsg_free(skb);
  4174. drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
  4175. err, seq);
  4176. }
  4177. static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
  4178. {
  4179. struct drbd_genlmsghdr *dh;
  4180. int err;
  4181. err = -EMSGSIZE;
  4182. dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
  4183. if (!dh)
  4184. goto nla_put_failure;
  4185. dh->minor = -1U;
  4186. dh->ret_code = NO_ERROR;
  4187. if (nla_put_notification_header(skb, NOTIFY_EXISTS))
  4188. goto nla_put_failure;
  4189. genlmsg_end(skb, dh);
  4190. return 0;
  4191. nla_put_failure:
  4192. nlmsg_free(skb);
  4193. pr_err("Error %d sending event. Event seq:%u\n", err, seq);
  4194. return err;
  4195. }
  4196. static void free_state_changes(struct list_head *list)
  4197. {
  4198. while (!list_empty(list)) {
  4199. struct drbd_state_change *state_change =
  4200. list_first_entry(list, struct drbd_state_change, list);
  4201. list_del(&state_change->list);
  4202. forget_state_change(state_change);
  4203. }
  4204. }
  4205. static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
  4206. {
  4207. return 1 +
  4208. state_change->n_connections +
  4209. state_change->n_devices +
  4210. state_change->n_devices * state_change->n_connections;
  4211. }
  4212. static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
  4213. {
  4214. struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
  4215. unsigned int seq = cb->args[2];
  4216. unsigned int n;
  4217. enum drbd_notification_type flags = 0;
  4218. int err = 0;
  4219. /* There is no need for taking notification_mutex here: it doesn't
  4220. matter if the initial state events mix with later state chage
  4221. events; we can always tell the events apart by the NOTIFY_EXISTS
  4222. flag. */
  4223. cb->args[5]--;
  4224. if (cb->args[5] == 1) {
  4225. err = notify_initial_state_done(skb, seq);
  4226. goto out;
  4227. }
  4228. n = cb->args[4]++;
  4229. if (cb->args[4] < cb->args[3])
  4230. flags |= NOTIFY_CONTINUES;
  4231. if (n < 1) {
  4232. err = notify_resource_state_change(skb, seq, state_change->resource,
  4233. NOTIFY_EXISTS | flags);
  4234. goto next;
  4235. }
  4236. n--;
  4237. if (n < state_change->n_connections) {
  4238. err = notify_connection_state_change(skb, seq, &state_change->connections[n],
  4239. NOTIFY_EXISTS | flags);
  4240. goto next;
  4241. }
  4242. n -= state_change->n_connections;
  4243. if (n < state_change->n_devices) {
  4244. err = notify_device_state_change(skb, seq, &state_change->devices[n],
  4245. NOTIFY_EXISTS | flags);
  4246. goto next;
  4247. }
  4248. n -= state_change->n_devices;
  4249. if (n < state_change->n_devices * state_change->n_connections) {
  4250. err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
  4251. NOTIFY_EXISTS | flags);
  4252. goto next;
  4253. }
  4254. next:
  4255. if (cb->args[4] == cb->args[3]) {
  4256. struct drbd_state_change *next_state_change =
  4257. list_entry(state_change->list.next,
  4258. struct drbd_state_change, list);
  4259. cb->args[0] = (long)next_state_change;
  4260. cb->args[3] = notifications_for_state_change(next_state_change);
  4261. cb->args[4] = 0;
  4262. }
  4263. out:
  4264. if (err)
  4265. return err;
  4266. else
  4267. return skb->len;
  4268. }
  4269. int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
  4270. {
  4271. struct drbd_resource *resource;
  4272. LIST_HEAD(head);
  4273. if (cb->args[5] >= 1) {
  4274. if (cb->args[5] > 1)
  4275. return get_initial_state(skb, cb);
  4276. if (cb->args[0]) {
  4277. struct drbd_state_change *state_change =
  4278. (struct drbd_state_change *)cb->args[0];
  4279. /* connect list to head */
  4280. list_add(&head, &state_change->list);
  4281. free_state_changes(&head);
  4282. }
  4283. return 0;
  4284. }
  4285. cb->args[5] = 2; /* number of iterations */
  4286. mutex_lock(&resources_mutex);
  4287. for_each_resource(resource, &drbd_resources) {
  4288. struct drbd_state_change *state_change;
  4289. state_change = remember_old_state(resource, GFP_KERNEL);
  4290. if (!state_change) {
  4291. if (!list_empty(&head))
  4292. free_state_changes(&head);
  4293. mutex_unlock(&resources_mutex);
  4294. return -ENOMEM;
  4295. }
  4296. copy_old_to_new_state_change(state_change);
  4297. list_add_tail(&state_change->list, &head);
  4298. cb->args[5] += notifications_for_state_change(state_change);
  4299. }
  4300. mutex_unlock(&resources_mutex);
  4301. if (!list_empty(&head)) {
  4302. struct drbd_state_change *state_change =
  4303. list_entry(head.next, struct drbd_state_change, list);
  4304. cb->args[0] = (long)state_change;
  4305. cb->args[3] = notifications_for_state_change(state_change);
  4306. list_del(&head); /* detach list from head */
  4307. }
  4308. cb->args[2] = cb->nlh->nlmsg_seq;
  4309. return get_initial_state(skb, cb);
  4310. }