pugixml.cpp 330 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021
  1. /**
  2. * pugixml parser - version 1.11
  3. * --------------------------------------------------------
  4. * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  5. * Report bugs and download new versions at https://pugixml.org/
  6. *
  7. * This library is distributed under the MIT License. See notice at the end
  8. * of this file.
  9. *
  10. * This work is based on the pugxml parser, which is:
  11. * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
  12. */
  13. #ifndef SOURCE_PUGIXML_CPP
  14. #define SOURCE_PUGIXML_CPP
  15. #include "pugixml.hpp"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include <limits.h>
  21. #ifdef PUGIXML_WCHAR_MODE
  22. # include <wchar.h>
  23. #endif
  24. #ifndef PUGIXML_NO_XPATH
  25. # include <math.h>
  26. # include <float.h>
  27. #endif
  28. #ifndef PUGIXML_NO_STL
  29. # include <istream>
  30. # include <ostream>
  31. # include <string>
  32. #endif
  33. // For placement new
  34. #include <new>
  35. #ifdef _MSC_VER
  36. # pragma warning(push)
  37. # pragma warning(disable: 4127) // conditional expression is constant
  38. # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
  39. # pragma warning(disable: 4702) // unreachable code
  40. # pragma warning(disable: 4996) // this function or variable may be unsafe
  41. #endif
  42. #if defined(_MSC_VER) && defined(__c2__)
  43. # pragma clang diagnostic push
  44. # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
  45. #endif
  46. #ifdef __INTEL_COMPILER
  47. # pragma warning(disable: 177) // function was declared but never referenced
  48. # pragma warning(disable: 279) // controlling expression is constant
  49. # pragma warning(disable: 1478 1786) // function was declared "deprecated"
  50. # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
  51. #endif
  52. #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
  53. # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
  54. #endif
  55. #ifdef __BORLANDC__
  56. # pragma option push
  57. # pragma warn -8008 // condition is always false
  58. # pragma warn -8066 // unreachable code
  59. #endif
  60. #ifdef __SNC__
  61. // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
  62. # pragma diag_suppress=178 // function was declared but never referenced
  63. # pragma diag_suppress=237 // controlling expression is constant
  64. #endif
  65. #ifdef __TI_COMPILER_VERSION__
  66. # pragma diag_suppress 179 // function was declared but never referenced
  67. #endif
  68. // Inlining controls
  69. #if defined(_MSC_VER) && _MSC_VER >= 1300
  70. # define PUGI__NO_INLINE __declspec(noinline)
  71. #elif defined(__GNUC__)
  72. # define PUGI__NO_INLINE __attribute__((noinline))
  73. #else
  74. # define PUGI__NO_INLINE
  75. #endif
  76. // Branch weight controls
  77. #if defined(__GNUC__) && !defined(__c2__)
  78. # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
  79. #else
  80. # define PUGI__UNLIKELY(cond) (cond)
  81. #endif
  82. // Simple static assertion
  83. #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
  84. // Digital Mars C++ bug workaround for passing char loaded from memory via stack
  85. #ifdef __DMC__
  86. # define PUGI__DMC_VOLATILE volatile
  87. #else
  88. # define PUGI__DMC_VOLATILE
  89. #endif
  90. // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
  91. #if defined(__clang__) && defined(__has_attribute)
  92. # if __has_attribute(no_sanitize)
  93. # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
  94. # else
  95. # define PUGI__UNSIGNED_OVERFLOW
  96. # endif
  97. #else
  98. # define PUGI__UNSIGNED_OVERFLOW
  99. #endif
  100. // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
  101. #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
  102. using std::memcpy;
  103. using std::memmove;
  104. using std::memset;
  105. #endif
  106. // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
  107. #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
  108. # define LLONG_MIN (-LLONG_MAX - 1LL)
  109. # define LLONG_MAX __LONG_LONG_MAX__
  110. # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
  111. #endif
  112. // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
  113. #if defined(_MSC_VER) && !defined(__S3E__)
  114. # define PUGI__MSVC_CRT_VERSION _MSC_VER
  115. #endif
  116. // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
  117. #if __cplusplus >= 201103
  118. # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
  119. #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  120. # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
  121. #else
  122. # define PUGI__SNPRINTF sprintf
  123. #endif
  124. // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
  125. #ifdef PUGIXML_HEADER_ONLY
  126. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  127. # define PUGI__NS_END } }
  128. # define PUGI__FN inline
  129. # define PUGI__FN_NO_INLINE inline
  130. #else
  131. # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
  132. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  133. # define PUGI__NS_END } }
  134. # else
  135. # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
  136. # define PUGI__NS_END } } }
  137. # endif
  138. # define PUGI__FN
  139. # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
  140. #endif
  141. // uintptr_t
  142. #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
  143. namespace pugi
  144. {
  145. # ifndef _UINTPTR_T_DEFINED
  146. typedef size_t uintptr_t;
  147. # endif
  148. typedef unsigned __int8 uint8_t;
  149. typedef unsigned __int16 uint16_t;
  150. typedef unsigned __int32 uint32_t;
  151. }
  152. #else
  153. # include <stdint.h>
  154. #endif
  155. // Memory allocation
  156. PUGI__NS_BEGIN
  157. PUGI__FN void* default_allocate(size_t size)
  158. {
  159. return malloc(size);
  160. }
  161. PUGI__FN void default_deallocate(void* ptr)
  162. {
  163. free(ptr);
  164. }
  165. template <typename T>
  166. struct xml_memory_management_function_storage
  167. {
  168. static allocation_function allocate;
  169. static deallocation_function deallocate;
  170. };
  171. // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
  172. // Without a template<> we'll get multiple definitions of the same static
  173. template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
  174. template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
  175. typedef xml_memory_management_function_storage<int> xml_memory;
  176. PUGI__NS_END
  177. // String utilities
  178. PUGI__NS_BEGIN
  179. // Get string length
  180. PUGI__FN size_t strlength(const char_t* s)
  181. {
  182. assert(s);
  183. #ifdef PUGIXML_WCHAR_MODE
  184. return wcslen(s);
  185. #else
  186. return strlen(s);
  187. #endif
  188. }
  189. // Compare two strings
  190. PUGI__FN bool strequal(const char_t* src, const char_t* dst)
  191. {
  192. assert(src && dst);
  193. #ifdef PUGIXML_WCHAR_MODE
  194. return wcscmp(src, dst) == 0;
  195. #else
  196. return strcmp(src, dst) == 0;
  197. #endif
  198. }
  199. // Compare lhs with [rhs_begin, rhs_end)
  200. PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
  201. {
  202. for (size_t i = 0; i < count; ++i)
  203. if (lhs[i] != rhs[i])
  204. return false;
  205. return lhs[count] == 0;
  206. }
  207. // Get length of wide string, even if CRT lacks wide character support
  208. PUGI__FN size_t strlength_wide(const wchar_t* s)
  209. {
  210. assert(s);
  211. #ifdef PUGIXML_WCHAR_MODE
  212. return wcslen(s);
  213. #else
  214. const wchar_t* end = s;
  215. while (*end) end++;
  216. return static_cast<size_t>(end - s);
  217. #endif
  218. }
  219. PUGI__NS_END
  220. // auto_ptr-like object for exception recovery
  221. PUGI__NS_BEGIN
  222. template <typename T> struct auto_deleter
  223. {
  224. typedef void (*D)(T*);
  225. T* data;
  226. D deleter;
  227. auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
  228. {
  229. }
  230. ~auto_deleter()
  231. {
  232. if (data) deleter(data);
  233. }
  234. T* release()
  235. {
  236. T* result = data;
  237. data = 0;
  238. return result;
  239. }
  240. };
  241. PUGI__NS_END
  242. #ifdef PUGIXML_COMPACT
  243. PUGI__NS_BEGIN
  244. class compact_hash_table
  245. {
  246. public:
  247. compact_hash_table(): _items(0), _capacity(0), _count(0)
  248. {
  249. }
  250. void clear()
  251. {
  252. if (_items)
  253. {
  254. xml_memory::deallocate(_items);
  255. _items = 0;
  256. _capacity = 0;
  257. _count = 0;
  258. }
  259. }
  260. void* find(const void* key)
  261. {
  262. if (_capacity == 0) return 0;
  263. item_t* item = get_item(key);
  264. assert(item);
  265. assert(item->key == key || (item->key == 0 && item->value == 0));
  266. return item->value;
  267. }
  268. void insert(const void* key, void* value)
  269. {
  270. assert(_capacity != 0 && _count < _capacity - _capacity / 4);
  271. item_t* item = get_item(key);
  272. assert(item);
  273. if (item->key == 0)
  274. {
  275. _count++;
  276. item->key = key;
  277. }
  278. item->value = value;
  279. }
  280. bool reserve(size_t extra = 16)
  281. {
  282. if (_count + extra >= _capacity - _capacity / 4)
  283. return rehash(_count + extra);
  284. return true;
  285. }
  286. private:
  287. struct item_t
  288. {
  289. const void* key;
  290. void* value;
  291. };
  292. item_t* _items;
  293. size_t _capacity;
  294. size_t _count;
  295. bool rehash(size_t count);
  296. item_t* get_item(const void* key)
  297. {
  298. assert(key);
  299. assert(_capacity > 0);
  300. size_t hashmod = _capacity - 1;
  301. size_t bucket = hash(key) & hashmod;
  302. for (size_t probe = 0; probe <= hashmod; ++probe)
  303. {
  304. item_t& probe_item = _items[bucket];
  305. if (probe_item.key == key || probe_item.key == 0)
  306. return &probe_item;
  307. // hash collision, quadratic probing
  308. bucket = (bucket + probe + 1) & hashmod;
  309. }
  310. assert(false && "Hash table is full"); // unreachable
  311. return 0;
  312. }
  313. static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
  314. {
  315. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
  316. // MurmurHash3 32-bit finalizer
  317. h ^= h >> 16;
  318. h *= 0x85ebca6bu;
  319. h ^= h >> 13;
  320. h *= 0xc2b2ae35u;
  321. h ^= h >> 16;
  322. return h;
  323. }
  324. };
  325. PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
  326. {
  327. size_t capacity = 32;
  328. while (count >= capacity - capacity / 4)
  329. capacity *= 2;
  330. compact_hash_table rt;
  331. rt._capacity = capacity;
  332. rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
  333. if (!rt._items)
  334. return false;
  335. memset(rt._items, 0, sizeof(item_t) * capacity);
  336. for (size_t i = 0; i < _capacity; ++i)
  337. if (_items[i].key)
  338. rt.insert(_items[i].key, _items[i].value);
  339. if (_items)
  340. xml_memory::deallocate(_items);
  341. _capacity = capacity;
  342. _items = rt._items;
  343. assert(_count == rt._count);
  344. return true;
  345. }
  346. PUGI__NS_END
  347. #endif
  348. PUGI__NS_BEGIN
  349. #ifdef PUGIXML_COMPACT
  350. static const uintptr_t xml_memory_block_alignment = 4;
  351. #else
  352. static const uintptr_t xml_memory_block_alignment = sizeof(void*);
  353. #endif
  354. // extra metadata bits
  355. static const uintptr_t xml_memory_page_contents_shared_mask = 64;
  356. static const uintptr_t xml_memory_page_name_allocated_mask = 32;
  357. static const uintptr_t xml_memory_page_value_allocated_mask = 16;
  358. static const uintptr_t xml_memory_page_type_mask = 15;
  359. // combined masks for string uniqueness
  360. static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
  361. static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
  362. #ifdef PUGIXML_COMPACT
  363. #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
  364. #define PUGI__GETPAGE_IMPL(header) (header).get_page()
  365. #else
  366. #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
  367. // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  368. #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
  369. #endif
  370. #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
  371. #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
  372. struct xml_allocator;
  373. struct xml_memory_page
  374. {
  375. static xml_memory_page* construct(void* memory)
  376. {
  377. xml_memory_page* result = static_cast<xml_memory_page*>(memory);
  378. result->allocator = 0;
  379. result->prev = 0;
  380. result->next = 0;
  381. result->busy_size = 0;
  382. result->freed_size = 0;
  383. #ifdef PUGIXML_COMPACT
  384. result->compact_string_base = 0;
  385. result->compact_shared_parent = 0;
  386. result->compact_page_marker = 0;
  387. #endif
  388. return result;
  389. }
  390. xml_allocator* allocator;
  391. xml_memory_page* prev;
  392. xml_memory_page* next;
  393. size_t busy_size;
  394. size_t freed_size;
  395. #ifdef PUGIXML_COMPACT
  396. char_t* compact_string_base;
  397. void* compact_shared_parent;
  398. uint32_t* compact_page_marker;
  399. #endif
  400. };
  401. static const size_t xml_memory_page_size =
  402. #ifdef PUGIXML_MEMORY_PAGE_SIZE
  403. (PUGIXML_MEMORY_PAGE_SIZE)
  404. #else
  405. 32768
  406. #endif
  407. - sizeof(xml_memory_page);
  408. struct xml_memory_string_header
  409. {
  410. uint16_t page_offset; // offset from page->data
  411. uint16_t full_size; // 0 if string occupies whole page
  412. };
  413. struct xml_allocator
  414. {
  415. xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
  416. {
  417. #ifdef PUGIXML_COMPACT
  418. _hash = 0;
  419. #endif
  420. }
  421. xml_memory_page* allocate_page(size_t data_size)
  422. {
  423. size_t size = sizeof(xml_memory_page) + data_size;
  424. // allocate block with some alignment, leaving memory for worst-case padding
  425. void* memory = xml_memory::allocate(size);
  426. if (!memory) return 0;
  427. // prepare page structure
  428. xml_memory_page* page = xml_memory_page::construct(memory);
  429. assert(page);
  430. page->allocator = _root->allocator;
  431. return page;
  432. }
  433. static void deallocate_page(xml_memory_page* page)
  434. {
  435. xml_memory::deallocate(page);
  436. }
  437. void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
  438. void* allocate_memory(size_t size, xml_memory_page*& out_page)
  439. {
  440. if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
  441. return allocate_memory_oob(size, out_page);
  442. void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
  443. _busy_size += size;
  444. out_page = _root;
  445. return buf;
  446. }
  447. #ifdef PUGIXML_COMPACT
  448. void* allocate_object(size_t size, xml_memory_page*& out_page)
  449. {
  450. void* result = allocate_memory(size + sizeof(uint32_t), out_page);
  451. if (!result) return 0;
  452. // adjust for marker
  453. ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
  454. if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
  455. {
  456. // insert new marker
  457. uint32_t* marker = static_cast<uint32_t*>(result);
  458. *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
  459. out_page->compact_page_marker = marker;
  460. // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
  461. // this will make sure deallocate_memory correctly tracks the size
  462. out_page->freed_size += sizeof(uint32_t);
  463. return marker + 1;
  464. }
  465. else
  466. {
  467. // roll back uint32_t part
  468. _busy_size -= sizeof(uint32_t);
  469. return result;
  470. }
  471. }
  472. #else
  473. void* allocate_object(size_t size, xml_memory_page*& out_page)
  474. {
  475. return allocate_memory(size, out_page);
  476. }
  477. #endif
  478. void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
  479. {
  480. if (page == _root) page->busy_size = _busy_size;
  481. assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
  482. (void)!ptr;
  483. page->freed_size += size;
  484. assert(page->freed_size <= page->busy_size);
  485. if (page->freed_size == page->busy_size)
  486. {
  487. if (page->next == 0)
  488. {
  489. assert(_root == page);
  490. // top page freed, just reset sizes
  491. page->busy_size = 0;
  492. page->freed_size = 0;
  493. #ifdef PUGIXML_COMPACT
  494. // reset compact state to maximize efficiency
  495. page->compact_string_base = 0;
  496. page->compact_shared_parent = 0;
  497. page->compact_page_marker = 0;
  498. #endif
  499. _busy_size = 0;
  500. }
  501. else
  502. {
  503. assert(_root != page);
  504. assert(page->prev);
  505. // remove from the list
  506. page->prev->next = page->next;
  507. page->next->prev = page->prev;
  508. // deallocate
  509. deallocate_page(page);
  510. }
  511. }
  512. }
  513. char_t* allocate_string(size_t length)
  514. {
  515. static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
  516. PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
  517. // allocate memory for string and header block
  518. size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
  519. // round size up to block alignment boundary
  520. size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
  521. xml_memory_page* page;
  522. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
  523. if (!header) return 0;
  524. // setup header
  525. ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
  526. assert(page_offset % xml_memory_block_alignment == 0);
  527. assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
  528. header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
  529. // full_size == 0 for large strings that occupy the whole page
  530. assert(full_size % xml_memory_block_alignment == 0);
  531. assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
  532. header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
  533. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  534. // header is guaranteed a pointer-sized alignment, which should be enough for char_t
  535. return static_cast<char_t*>(static_cast<void*>(header + 1));
  536. }
  537. void deallocate_string(char_t* string)
  538. {
  539. // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  540. // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
  541. // get header
  542. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
  543. assert(header);
  544. // deallocate
  545. size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
  546. xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
  547. // if full_size == 0 then this string occupies the whole page
  548. size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
  549. deallocate_memory(header, full_size, page);
  550. }
  551. bool reserve()
  552. {
  553. #ifdef PUGIXML_COMPACT
  554. return _hash->reserve();
  555. #else
  556. return true;
  557. #endif
  558. }
  559. xml_memory_page* _root;
  560. size_t _busy_size;
  561. #ifdef PUGIXML_COMPACT
  562. compact_hash_table* _hash;
  563. #endif
  564. };
  565. PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
  566. {
  567. const size_t large_allocation_threshold = xml_memory_page_size / 4;
  568. xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
  569. out_page = page;
  570. if (!page) return 0;
  571. if (size <= large_allocation_threshold)
  572. {
  573. _root->busy_size = _busy_size;
  574. // insert page at the end of linked list
  575. page->prev = _root;
  576. _root->next = page;
  577. _root = page;
  578. _busy_size = size;
  579. }
  580. else
  581. {
  582. // insert page before the end of linked list, so that it is deleted as soon as possible
  583. // the last page is not deleted even if it's empty (see deallocate_memory)
  584. assert(_root->prev);
  585. page->prev = _root->prev;
  586. page->next = _root;
  587. _root->prev->next = page;
  588. _root->prev = page;
  589. page->busy_size = size;
  590. }
  591. return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
  592. }
  593. PUGI__NS_END
  594. #ifdef PUGIXML_COMPACT
  595. PUGI__NS_BEGIN
  596. static const uintptr_t compact_alignment_log2 = 2;
  597. static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
  598. class compact_header
  599. {
  600. public:
  601. compact_header(xml_memory_page* page, unsigned int flags)
  602. {
  603. PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
  604. ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
  605. assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
  606. _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
  607. _flags = static_cast<unsigned char>(flags);
  608. }
  609. void operator&=(uintptr_t mod)
  610. {
  611. _flags &= static_cast<unsigned char>(mod);
  612. }
  613. void operator|=(uintptr_t mod)
  614. {
  615. _flags |= static_cast<unsigned char>(mod);
  616. }
  617. uintptr_t operator&(uintptr_t mod) const
  618. {
  619. return _flags & mod;
  620. }
  621. xml_memory_page* get_page() const
  622. {
  623. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  624. const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
  625. const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
  626. return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
  627. }
  628. private:
  629. unsigned char _page;
  630. unsigned char _flags;
  631. };
  632. PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
  633. {
  634. const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
  635. return header->get_page();
  636. }
  637. template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
  638. {
  639. return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
  640. }
  641. template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
  642. {
  643. compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
  644. }
  645. template <typename T, int header_offset, int start = -126> class compact_pointer
  646. {
  647. public:
  648. compact_pointer(): _data(0)
  649. {
  650. }
  651. void operator=(const compact_pointer& rhs)
  652. {
  653. *this = rhs + 0;
  654. }
  655. void operator=(T* value)
  656. {
  657. if (value)
  658. {
  659. // value is guaranteed to be compact-aligned; 'this' is not
  660. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  661. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  662. // compensate for arithmetic shift rounding for negative values
  663. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  664. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
  665. if (static_cast<uintptr_t>(offset) <= 253)
  666. _data = static_cast<unsigned char>(offset + 1);
  667. else
  668. {
  669. compact_set_value<header_offset>(this, value);
  670. _data = 255;
  671. }
  672. }
  673. else
  674. _data = 0;
  675. }
  676. operator T*() const
  677. {
  678. if (_data)
  679. {
  680. if (_data < 255)
  681. {
  682. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  683. return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
  684. }
  685. else
  686. return compact_get_value<header_offset, T>(this);
  687. }
  688. else
  689. return 0;
  690. }
  691. T* operator->() const
  692. {
  693. return *this;
  694. }
  695. private:
  696. unsigned char _data;
  697. };
  698. template <typename T, int header_offset> class compact_pointer_parent
  699. {
  700. public:
  701. compact_pointer_parent(): _data(0)
  702. {
  703. }
  704. void operator=(const compact_pointer_parent& rhs)
  705. {
  706. *this = rhs + 0;
  707. }
  708. void operator=(T* value)
  709. {
  710. if (value)
  711. {
  712. // value is guaranteed to be compact-aligned; 'this' is not
  713. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  714. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  715. // compensate for arithmetic shift behavior for negative values
  716. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  717. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
  718. if (static_cast<uintptr_t>(offset) <= 65533)
  719. {
  720. _data = static_cast<unsigned short>(offset + 1);
  721. }
  722. else
  723. {
  724. xml_memory_page* page = compact_get_page(this, header_offset);
  725. if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
  726. page->compact_shared_parent = value;
  727. if (page->compact_shared_parent == value)
  728. {
  729. _data = 65534;
  730. }
  731. else
  732. {
  733. compact_set_value<header_offset>(this, value);
  734. _data = 65535;
  735. }
  736. }
  737. }
  738. else
  739. {
  740. _data = 0;
  741. }
  742. }
  743. operator T*() const
  744. {
  745. if (_data)
  746. {
  747. if (_data < 65534)
  748. {
  749. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  750. return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
  751. }
  752. else if (_data == 65534)
  753. return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
  754. else
  755. return compact_get_value<header_offset, T>(this);
  756. }
  757. else
  758. return 0;
  759. }
  760. T* operator->() const
  761. {
  762. return *this;
  763. }
  764. private:
  765. uint16_t _data;
  766. };
  767. template <int header_offset, int base_offset> class compact_string
  768. {
  769. public:
  770. compact_string(): _data(0)
  771. {
  772. }
  773. void operator=(const compact_string& rhs)
  774. {
  775. *this = rhs + 0;
  776. }
  777. void operator=(char_t* value)
  778. {
  779. if (value)
  780. {
  781. xml_memory_page* page = compact_get_page(this, header_offset);
  782. if (PUGI__UNLIKELY(page->compact_string_base == 0))
  783. page->compact_string_base = value;
  784. ptrdiff_t offset = value - page->compact_string_base;
  785. if (static_cast<uintptr_t>(offset) < (65535 << 7))
  786. {
  787. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  788. uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
  789. if (*base == 0)
  790. {
  791. *base = static_cast<uint16_t>((offset >> 7) + 1);
  792. _data = static_cast<unsigned char>((offset & 127) + 1);
  793. }
  794. else
  795. {
  796. ptrdiff_t remainder = offset - ((*base - 1) << 7);
  797. if (static_cast<uintptr_t>(remainder) <= 253)
  798. {
  799. _data = static_cast<unsigned char>(remainder + 1);
  800. }
  801. else
  802. {
  803. compact_set_value<header_offset>(this, value);
  804. _data = 255;
  805. }
  806. }
  807. }
  808. else
  809. {
  810. compact_set_value<header_offset>(this, value);
  811. _data = 255;
  812. }
  813. }
  814. else
  815. {
  816. _data = 0;
  817. }
  818. }
  819. operator char_t*() const
  820. {
  821. if (_data)
  822. {
  823. if (_data < 255)
  824. {
  825. xml_memory_page* page = compact_get_page(this, header_offset);
  826. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  827. const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
  828. assert(*base);
  829. ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
  830. return page->compact_string_base + offset;
  831. }
  832. else
  833. {
  834. return compact_get_value<header_offset, char_t>(this);
  835. }
  836. }
  837. else
  838. return 0;
  839. }
  840. private:
  841. unsigned char _data;
  842. };
  843. PUGI__NS_END
  844. #endif
  845. #ifdef PUGIXML_COMPACT
  846. namespace pugi
  847. {
  848. struct xml_attribute_struct
  849. {
  850. xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
  851. {
  852. PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
  853. }
  854. impl::compact_header header;
  855. uint16_t namevalue_base;
  856. impl::compact_string<4, 2> name;
  857. impl::compact_string<5, 3> value;
  858. impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
  859. impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
  860. };
  861. struct xml_node_struct
  862. {
  863. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
  864. {
  865. PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
  866. }
  867. impl::compact_header header;
  868. uint16_t namevalue_base;
  869. impl::compact_string<4, 2> name;
  870. impl::compact_string<5, 3> value;
  871. impl::compact_pointer_parent<xml_node_struct, 6> parent;
  872. impl::compact_pointer<xml_node_struct, 8, 0> first_child;
  873. impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
  874. impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
  875. impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
  876. };
  877. }
  878. #else
  879. namespace pugi
  880. {
  881. struct xml_attribute_struct
  882. {
  883. xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
  884. {
  885. header = PUGI__GETHEADER_IMPL(this, page, 0);
  886. }
  887. uintptr_t header;
  888. char_t* name;
  889. char_t* value;
  890. xml_attribute_struct* prev_attribute_c;
  891. xml_attribute_struct* next_attribute;
  892. };
  893. struct xml_node_struct
  894. {
  895. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
  896. {
  897. header = PUGI__GETHEADER_IMPL(this, page, type);
  898. }
  899. uintptr_t header;
  900. char_t* name;
  901. char_t* value;
  902. xml_node_struct* parent;
  903. xml_node_struct* first_child;
  904. xml_node_struct* prev_sibling_c;
  905. xml_node_struct* next_sibling;
  906. xml_attribute_struct* first_attribute;
  907. };
  908. }
  909. #endif
  910. PUGI__NS_BEGIN
  911. struct xml_extra_buffer
  912. {
  913. char_t* buffer;
  914. xml_extra_buffer* next;
  915. };
  916. struct xml_document_struct: public xml_node_struct, public xml_allocator
  917. {
  918. xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
  919. {
  920. }
  921. const char_t* buffer;
  922. xml_extra_buffer* extra_buffers;
  923. #ifdef PUGIXML_COMPACT
  924. compact_hash_table hash;
  925. #endif
  926. };
  927. template <typename Object> inline xml_allocator& get_allocator(const Object* object)
  928. {
  929. assert(object);
  930. return *PUGI__GETPAGE(object)->allocator;
  931. }
  932. template <typename Object> inline xml_document_struct& get_document(const Object* object)
  933. {
  934. assert(object);
  935. return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
  936. }
  937. PUGI__NS_END
  938. // Low-level DOM operations
  939. PUGI__NS_BEGIN
  940. inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
  941. {
  942. xml_memory_page* page;
  943. void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
  944. if (!memory) return 0;
  945. return new (memory) xml_attribute_struct(page);
  946. }
  947. inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
  948. {
  949. xml_memory_page* page;
  950. void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
  951. if (!memory) return 0;
  952. return new (memory) xml_node_struct(page, type);
  953. }
  954. inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
  955. {
  956. if (a->header & impl::xml_memory_page_name_allocated_mask)
  957. alloc.deallocate_string(a->name);
  958. if (a->header & impl::xml_memory_page_value_allocated_mask)
  959. alloc.deallocate_string(a->value);
  960. alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
  961. }
  962. inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
  963. {
  964. if (n->header & impl::xml_memory_page_name_allocated_mask)
  965. alloc.deallocate_string(n->name);
  966. if (n->header & impl::xml_memory_page_value_allocated_mask)
  967. alloc.deallocate_string(n->value);
  968. for (xml_attribute_struct* attr = n->first_attribute; attr; )
  969. {
  970. xml_attribute_struct* next = attr->next_attribute;
  971. destroy_attribute(attr, alloc);
  972. attr = next;
  973. }
  974. for (xml_node_struct* child = n->first_child; child; )
  975. {
  976. xml_node_struct* next = child->next_sibling;
  977. destroy_node(child, alloc);
  978. child = next;
  979. }
  980. alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
  981. }
  982. inline void append_node(xml_node_struct* child, xml_node_struct* node)
  983. {
  984. child->parent = node;
  985. xml_node_struct* head = node->first_child;
  986. if (head)
  987. {
  988. xml_node_struct* tail = head->prev_sibling_c;
  989. tail->next_sibling = child;
  990. child->prev_sibling_c = tail;
  991. head->prev_sibling_c = child;
  992. }
  993. else
  994. {
  995. node->first_child = child;
  996. child->prev_sibling_c = child;
  997. }
  998. }
  999. inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
  1000. {
  1001. child->parent = node;
  1002. xml_node_struct* head = node->first_child;
  1003. if (head)
  1004. {
  1005. child->prev_sibling_c = head->prev_sibling_c;
  1006. head->prev_sibling_c = child;
  1007. }
  1008. else
  1009. child->prev_sibling_c = child;
  1010. child->next_sibling = head;
  1011. node->first_child = child;
  1012. }
  1013. inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
  1014. {
  1015. xml_node_struct* parent = node->parent;
  1016. child->parent = parent;
  1017. if (node->next_sibling)
  1018. node->next_sibling->prev_sibling_c = child;
  1019. else
  1020. parent->first_child->prev_sibling_c = child;
  1021. child->next_sibling = node->next_sibling;
  1022. child->prev_sibling_c = node;
  1023. node->next_sibling = child;
  1024. }
  1025. inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
  1026. {
  1027. xml_node_struct* parent = node->parent;
  1028. child->parent = parent;
  1029. if (node->prev_sibling_c->next_sibling)
  1030. node->prev_sibling_c->next_sibling = child;
  1031. else
  1032. parent->first_child = child;
  1033. child->prev_sibling_c = node->prev_sibling_c;
  1034. child->next_sibling = node;
  1035. node->prev_sibling_c = child;
  1036. }
  1037. inline void remove_node(xml_node_struct* node)
  1038. {
  1039. xml_node_struct* parent = node->parent;
  1040. if (node->next_sibling)
  1041. node->next_sibling->prev_sibling_c = node->prev_sibling_c;
  1042. else
  1043. parent->first_child->prev_sibling_c = node->prev_sibling_c;
  1044. if (node->prev_sibling_c->next_sibling)
  1045. node->prev_sibling_c->next_sibling = node->next_sibling;
  1046. else
  1047. parent->first_child = node->next_sibling;
  1048. node->parent = 0;
  1049. node->prev_sibling_c = 0;
  1050. node->next_sibling = 0;
  1051. }
  1052. inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1053. {
  1054. xml_attribute_struct* head = node->first_attribute;
  1055. if (head)
  1056. {
  1057. xml_attribute_struct* tail = head->prev_attribute_c;
  1058. tail->next_attribute = attr;
  1059. attr->prev_attribute_c = tail;
  1060. head->prev_attribute_c = attr;
  1061. }
  1062. else
  1063. {
  1064. node->first_attribute = attr;
  1065. attr->prev_attribute_c = attr;
  1066. }
  1067. }
  1068. inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1069. {
  1070. xml_attribute_struct* head = node->first_attribute;
  1071. if (head)
  1072. {
  1073. attr->prev_attribute_c = head->prev_attribute_c;
  1074. head->prev_attribute_c = attr;
  1075. }
  1076. else
  1077. attr->prev_attribute_c = attr;
  1078. attr->next_attribute = head;
  1079. node->first_attribute = attr;
  1080. }
  1081. inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1082. {
  1083. if (place->next_attribute)
  1084. place->next_attribute->prev_attribute_c = attr;
  1085. else
  1086. node->first_attribute->prev_attribute_c = attr;
  1087. attr->next_attribute = place->next_attribute;
  1088. attr->prev_attribute_c = place;
  1089. place->next_attribute = attr;
  1090. }
  1091. inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1092. {
  1093. if (place->prev_attribute_c->next_attribute)
  1094. place->prev_attribute_c->next_attribute = attr;
  1095. else
  1096. node->first_attribute = attr;
  1097. attr->prev_attribute_c = place->prev_attribute_c;
  1098. attr->next_attribute = place;
  1099. place->prev_attribute_c = attr;
  1100. }
  1101. inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1102. {
  1103. if (attr->next_attribute)
  1104. attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
  1105. else
  1106. node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
  1107. if (attr->prev_attribute_c->next_attribute)
  1108. attr->prev_attribute_c->next_attribute = attr->next_attribute;
  1109. else
  1110. node->first_attribute = attr->next_attribute;
  1111. attr->prev_attribute_c = 0;
  1112. attr->next_attribute = 0;
  1113. }
  1114. PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
  1115. {
  1116. if (!alloc.reserve()) return 0;
  1117. xml_node_struct* child = allocate_node(alloc, type);
  1118. if (!child) return 0;
  1119. append_node(child, node);
  1120. return child;
  1121. }
  1122. PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
  1123. {
  1124. if (!alloc.reserve()) return 0;
  1125. xml_attribute_struct* attr = allocate_attribute(alloc);
  1126. if (!attr) return 0;
  1127. append_attribute(attr, node);
  1128. return attr;
  1129. }
  1130. PUGI__NS_END
  1131. // Helper classes for code generation
  1132. PUGI__NS_BEGIN
  1133. struct opt_false
  1134. {
  1135. enum { value = 0 };
  1136. };
  1137. struct opt_true
  1138. {
  1139. enum { value = 1 };
  1140. };
  1141. PUGI__NS_END
  1142. // Unicode utilities
  1143. PUGI__NS_BEGIN
  1144. inline uint16_t endian_swap(uint16_t value)
  1145. {
  1146. return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
  1147. }
  1148. inline uint32_t endian_swap(uint32_t value)
  1149. {
  1150. return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
  1151. }
  1152. struct utf8_counter
  1153. {
  1154. typedef size_t value_type;
  1155. static value_type low(value_type result, uint32_t ch)
  1156. {
  1157. // U+0000..U+007F
  1158. if (ch < 0x80) return result + 1;
  1159. // U+0080..U+07FF
  1160. else if (ch < 0x800) return result + 2;
  1161. // U+0800..U+FFFF
  1162. else return result + 3;
  1163. }
  1164. static value_type high(value_type result, uint32_t)
  1165. {
  1166. // U+10000..U+10FFFF
  1167. return result + 4;
  1168. }
  1169. };
  1170. struct utf8_writer
  1171. {
  1172. typedef uint8_t* value_type;
  1173. static value_type low(value_type result, uint32_t ch)
  1174. {
  1175. // U+0000..U+007F
  1176. if (ch < 0x80)
  1177. {
  1178. *result = static_cast<uint8_t>(ch);
  1179. return result + 1;
  1180. }
  1181. // U+0080..U+07FF
  1182. else if (ch < 0x800)
  1183. {
  1184. result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
  1185. result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1186. return result + 2;
  1187. }
  1188. // U+0800..U+FFFF
  1189. else
  1190. {
  1191. result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
  1192. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1193. result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1194. return result + 3;
  1195. }
  1196. }
  1197. static value_type high(value_type result, uint32_t ch)
  1198. {
  1199. // U+10000..U+10FFFF
  1200. result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
  1201. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
  1202. result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1203. result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1204. return result + 4;
  1205. }
  1206. static value_type any(value_type result, uint32_t ch)
  1207. {
  1208. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1209. }
  1210. };
  1211. struct utf16_counter
  1212. {
  1213. typedef size_t value_type;
  1214. static value_type low(value_type result, uint32_t)
  1215. {
  1216. return result + 1;
  1217. }
  1218. static value_type high(value_type result, uint32_t)
  1219. {
  1220. return result + 2;
  1221. }
  1222. };
  1223. struct utf16_writer
  1224. {
  1225. typedef uint16_t* value_type;
  1226. static value_type low(value_type result, uint32_t ch)
  1227. {
  1228. *result = static_cast<uint16_t>(ch);
  1229. return result + 1;
  1230. }
  1231. static value_type high(value_type result, uint32_t ch)
  1232. {
  1233. uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
  1234. uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
  1235. result[0] = static_cast<uint16_t>(0xD800 + msh);
  1236. result[1] = static_cast<uint16_t>(0xDC00 + lsh);
  1237. return result + 2;
  1238. }
  1239. static value_type any(value_type result, uint32_t ch)
  1240. {
  1241. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1242. }
  1243. };
  1244. struct utf32_counter
  1245. {
  1246. typedef size_t value_type;
  1247. static value_type low(value_type result, uint32_t)
  1248. {
  1249. return result + 1;
  1250. }
  1251. static value_type high(value_type result, uint32_t)
  1252. {
  1253. return result + 1;
  1254. }
  1255. };
  1256. struct utf32_writer
  1257. {
  1258. typedef uint32_t* value_type;
  1259. static value_type low(value_type result, uint32_t ch)
  1260. {
  1261. *result = ch;
  1262. return result + 1;
  1263. }
  1264. static value_type high(value_type result, uint32_t ch)
  1265. {
  1266. *result = ch;
  1267. return result + 1;
  1268. }
  1269. static value_type any(value_type result, uint32_t ch)
  1270. {
  1271. *result = ch;
  1272. return result + 1;
  1273. }
  1274. };
  1275. struct latin1_writer
  1276. {
  1277. typedef uint8_t* value_type;
  1278. static value_type low(value_type result, uint32_t ch)
  1279. {
  1280. *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
  1281. return result + 1;
  1282. }
  1283. static value_type high(value_type result, uint32_t ch)
  1284. {
  1285. (void)ch;
  1286. *result = '?';
  1287. return result + 1;
  1288. }
  1289. };
  1290. struct utf8_decoder
  1291. {
  1292. typedef uint8_t type;
  1293. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1294. {
  1295. const uint8_t utf8_byte_mask = 0x3f;
  1296. while (size)
  1297. {
  1298. uint8_t lead = *data;
  1299. // 0xxxxxxx -> U+0000..U+007F
  1300. if (lead < 0x80)
  1301. {
  1302. result = Traits::low(result, lead);
  1303. data += 1;
  1304. size -= 1;
  1305. // process aligned single-byte (ascii) blocks
  1306. if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
  1307. {
  1308. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  1309. while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
  1310. {
  1311. result = Traits::low(result, data[0]);
  1312. result = Traits::low(result, data[1]);
  1313. result = Traits::low(result, data[2]);
  1314. result = Traits::low(result, data[3]);
  1315. data += 4;
  1316. size -= 4;
  1317. }
  1318. }
  1319. }
  1320. // 110xxxxx -> U+0080..U+07FF
  1321. else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
  1322. {
  1323. result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
  1324. data += 2;
  1325. size -= 2;
  1326. }
  1327. // 1110xxxx -> U+0800-U+FFFF
  1328. else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
  1329. {
  1330. result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
  1331. data += 3;
  1332. size -= 3;
  1333. }
  1334. // 11110xxx -> U+10000..U+10FFFF
  1335. else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
  1336. {
  1337. result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
  1338. data += 4;
  1339. size -= 4;
  1340. }
  1341. // 10xxxxxx or 11111xxx -> invalid
  1342. else
  1343. {
  1344. data += 1;
  1345. size -= 1;
  1346. }
  1347. }
  1348. return result;
  1349. }
  1350. };
  1351. template <typename opt_swap> struct utf16_decoder
  1352. {
  1353. typedef uint16_t type;
  1354. template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
  1355. {
  1356. while (size)
  1357. {
  1358. uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1359. // U+0000..U+D7FF
  1360. if (lead < 0xD800)
  1361. {
  1362. result = Traits::low(result, lead);
  1363. data += 1;
  1364. size -= 1;
  1365. }
  1366. // U+E000..U+FFFF
  1367. else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
  1368. {
  1369. result = Traits::low(result, lead);
  1370. data += 1;
  1371. size -= 1;
  1372. }
  1373. // surrogate pair lead
  1374. else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
  1375. {
  1376. uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
  1377. if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
  1378. {
  1379. result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
  1380. data += 2;
  1381. size -= 2;
  1382. }
  1383. else
  1384. {
  1385. data += 1;
  1386. size -= 1;
  1387. }
  1388. }
  1389. else
  1390. {
  1391. data += 1;
  1392. size -= 1;
  1393. }
  1394. }
  1395. return result;
  1396. }
  1397. };
  1398. template <typename opt_swap> struct utf32_decoder
  1399. {
  1400. typedef uint32_t type;
  1401. template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
  1402. {
  1403. while (size)
  1404. {
  1405. uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1406. // U+0000..U+FFFF
  1407. if (lead < 0x10000)
  1408. {
  1409. result = Traits::low(result, lead);
  1410. data += 1;
  1411. size -= 1;
  1412. }
  1413. // U+10000..U+10FFFF
  1414. else
  1415. {
  1416. result = Traits::high(result, lead);
  1417. data += 1;
  1418. size -= 1;
  1419. }
  1420. }
  1421. return result;
  1422. }
  1423. };
  1424. struct latin1_decoder
  1425. {
  1426. typedef uint8_t type;
  1427. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1428. {
  1429. while (size)
  1430. {
  1431. result = Traits::low(result, *data);
  1432. data += 1;
  1433. size -= 1;
  1434. }
  1435. return result;
  1436. }
  1437. };
  1438. template <size_t size> struct wchar_selector;
  1439. template <> struct wchar_selector<2>
  1440. {
  1441. typedef uint16_t type;
  1442. typedef utf16_counter counter;
  1443. typedef utf16_writer writer;
  1444. typedef utf16_decoder<opt_false> decoder;
  1445. };
  1446. template <> struct wchar_selector<4>
  1447. {
  1448. typedef uint32_t type;
  1449. typedef utf32_counter counter;
  1450. typedef utf32_writer writer;
  1451. typedef utf32_decoder<opt_false> decoder;
  1452. };
  1453. typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
  1454. typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
  1455. struct wchar_decoder
  1456. {
  1457. typedef wchar_t type;
  1458. template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
  1459. {
  1460. typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
  1461. return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
  1462. }
  1463. };
  1464. #ifdef PUGIXML_WCHAR_MODE
  1465. PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
  1466. {
  1467. for (size_t i = 0; i < length; ++i)
  1468. result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
  1469. }
  1470. #endif
  1471. PUGI__NS_END
  1472. PUGI__NS_BEGIN
  1473. enum chartype_t
  1474. {
  1475. ct_parse_pcdata = 1, // \0, &, \r, <
  1476. ct_parse_attr = 2, // \0, &, \r, ', "
  1477. ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
  1478. ct_space = 8, // \r, \n, space, tab
  1479. ct_parse_cdata = 16, // \0, ], >, \r
  1480. ct_parse_comment = 32, // \0, -, >, \r
  1481. ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
  1482. ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
  1483. };
  1484. static const unsigned char chartype_table[256] =
  1485. {
  1486. 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
  1487. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
  1488. 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
  1489. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
  1490. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
  1491. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
  1492. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
  1493. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
  1494. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
  1495. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1496. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1497. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1498. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1499. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1500. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1501. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
  1502. };
  1503. enum chartypex_t
  1504. {
  1505. ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
  1506. ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
  1507. ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
  1508. ctx_digit = 8, // 0-9
  1509. ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
  1510. };
  1511. static const unsigned char chartypex_table[256] =
  1512. {
  1513. 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
  1514. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
  1515. 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
  1516. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
  1517. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
  1518. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
  1519. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
  1520. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
  1521. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
  1522. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1523. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1524. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1525. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1526. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1527. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1528. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
  1529. };
  1530. #ifdef PUGIXML_WCHAR_MODE
  1531. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
  1532. #else
  1533. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
  1534. #endif
  1535. #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
  1536. #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
  1537. PUGI__FN bool is_little_endian()
  1538. {
  1539. unsigned int ui = 1;
  1540. return *reinterpret_cast<unsigned char*>(&ui) == 1;
  1541. }
  1542. PUGI__FN xml_encoding get_wchar_encoding()
  1543. {
  1544. PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
  1545. if (sizeof(wchar_t) == 2)
  1546. return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1547. else
  1548. return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1549. }
  1550. PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
  1551. {
  1552. #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
  1553. #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
  1554. // check if we have a non-empty XML declaration
  1555. if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
  1556. return false;
  1557. // scan XML declaration until the encoding field
  1558. for (size_t i = 6; i + 1 < size; ++i)
  1559. {
  1560. // declaration can not contain ? in quoted values
  1561. if (data[i] == '?')
  1562. return false;
  1563. if (data[i] == 'e' && data[i + 1] == 'n')
  1564. {
  1565. size_t offset = i;
  1566. // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
  1567. PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
  1568. PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
  1569. // S? = S?
  1570. PUGI__SCANCHARTYPE(ct_space);
  1571. PUGI__SCANCHAR('=');
  1572. PUGI__SCANCHARTYPE(ct_space);
  1573. // the only two valid delimiters are ' and "
  1574. uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
  1575. PUGI__SCANCHAR(delimiter);
  1576. size_t start = offset;
  1577. out_encoding = data + offset;
  1578. PUGI__SCANCHARTYPE(ct_symbol);
  1579. out_length = offset - start;
  1580. PUGI__SCANCHAR(delimiter);
  1581. return true;
  1582. }
  1583. }
  1584. return false;
  1585. #undef PUGI__SCANCHAR
  1586. #undef PUGI__SCANCHARTYPE
  1587. }
  1588. PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
  1589. {
  1590. // skip encoding autodetection if input buffer is too small
  1591. if (size < 4) return encoding_utf8;
  1592. uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
  1593. // look for BOM in first few bytes
  1594. if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
  1595. if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1596. if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
  1597. if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
  1598. if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
  1599. // look for <, <? or <?xm in various encodings
  1600. if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
  1601. if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1602. if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
  1603. if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
  1604. // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
  1605. if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
  1606. if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
  1607. // no known BOM detected; parse declaration
  1608. const uint8_t* enc = 0;
  1609. size_t enc_length = 0;
  1610. if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
  1611. {
  1612. // iso-8859-1 (case-insensitive)
  1613. if (enc_length == 10
  1614. && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
  1615. && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
  1616. && enc[8] == '-' && enc[9] == '1')
  1617. return encoding_latin1;
  1618. // latin1 (case-insensitive)
  1619. if (enc_length == 6
  1620. && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
  1621. && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
  1622. && enc[5] == '1')
  1623. return encoding_latin1;
  1624. }
  1625. return encoding_utf8;
  1626. }
  1627. PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
  1628. {
  1629. // replace wchar encoding with utf implementation
  1630. if (encoding == encoding_wchar) return get_wchar_encoding();
  1631. // replace utf16 encoding with utf16 with specific endianness
  1632. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1633. // replace utf32 encoding with utf32 with specific endianness
  1634. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1635. // only do autodetection if no explicit encoding is requested
  1636. if (encoding != encoding_auto) return encoding;
  1637. // try to guess encoding (based on XML specification, Appendix F.1)
  1638. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1639. return guess_buffer_encoding(data, size);
  1640. }
  1641. PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1642. {
  1643. size_t length = size / sizeof(char_t);
  1644. if (is_mutable)
  1645. {
  1646. out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
  1647. out_length = length;
  1648. }
  1649. else
  1650. {
  1651. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1652. if (!buffer) return false;
  1653. if (contents)
  1654. memcpy(buffer, contents, length * sizeof(char_t));
  1655. else
  1656. assert(length == 0);
  1657. buffer[length] = 0;
  1658. out_buffer = buffer;
  1659. out_length = length + 1;
  1660. }
  1661. return true;
  1662. }
  1663. #ifdef PUGIXML_WCHAR_MODE
  1664. PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
  1665. {
  1666. return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
  1667. (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
  1668. }
  1669. PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1670. {
  1671. const char_t* data = static_cast<const char_t*>(contents);
  1672. size_t length = size / sizeof(char_t);
  1673. if (is_mutable)
  1674. {
  1675. char_t* buffer = const_cast<char_t*>(data);
  1676. convert_wchar_endian_swap(buffer, data, length);
  1677. out_buffer = buffer;
  1678. out_length = length;
  1679. }
  1680. else
  1681. {
  1682. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1683. if (!buffer) return false;
  1684. convert_wchar_endian_swap(buffer, data, length);
  1685. buffer[length] = 0;
  1686. out_buffer = buffer;
  1687. out_length = length + 1;
  1688. }
  1689. return true;
  1690. }
  1691. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1692. {
  1693. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1694. size_t data_length = size / sizeof(typename D::type);
  1695. // first pass: get length in wchar_t units
  1696. size_t length = D::process(data, data_length, 0, wchar_counter());
  1697. // allocate buffer of suitable length
  1698. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1699. if (!buffer) return false;
  1700. // second pass: convert utf16 input to wchar_t
  1701. wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
  1702. wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
  1703. assert(oend == obegin + length);
  1704. *oend = 0;
  1705. out_buffer = buffer;
  1706. out_length = length + 1;
  1707. return true;
  1708. }
  1709. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1710. {
  1711. // get native encoding
  1712. xml_encoding wchar_encoding = get_wchar_encoding();
  1713. // fast path: no conversion required
  1714. if (encoding == wchar_encoding)
  1715. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1716. // only endian-swapping is required
  1717. if (need_endian_swap_utf(encoding, wchar_encoding))
  1718. return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
  1719. // source encoding is utf8
  1720. if (encoding == encoding_utf8)
  1721. return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
  1722. // source encoding is utf16
  1723. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1724. {
  1725. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1726. return (native_encoding == encoding) ?
  1727. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1728. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1729. }
  1730. // source encoding is utf32
  1731. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1732. {
  1733. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1734. return (native_encoding == encoding) ?
  1735. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1736. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1737. }
  1738. // source encoding is latin1
  1739. if (encoding == encoding_latin1)
  1740. return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
  1741. assert(false && "Invalid encoding"); // unreachable
  1742. return false;
  1743. }
  1744. #else
  1745. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1746. {
  1747. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1748. size_t data_length = size / sizeof(typename D::type);
  1749. // first pass: get length in utf8 units
  1750. size_t length = D::process(data, data_length, 0, utf8_counter());
  1751. // allocate buffer of suitable length
  1752. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1753. if (!buffer) return false;
  1754. // second pass: convert utf16 input to utf8
  1755. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1756. uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
  1757. assert(oend == obegin + length);
  1758. *oend = 0;
  1759. out_buffer = buffer;
  1760. out_length = length + 1;
  1761. return true;
  1762. }
  1763. PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
  1764. {
  1765. for (size_t i = 0; i < size; ++i)
  1766. if (data[i] > 127)
  1767. return i;
  1768. return size;
  1769. }
  1770. PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1771. {
  1772. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1773. size_t data_length = size;
  1774. // get size of prefix that does not need utf8 conversion
  1775. size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
  1776. assert(prefix_length <= data_length);
  1777. const uint8_t* postfix = data + prefix_length;
  1778. size_t postfix_length = data_length - prefix_length;
  1779. // if no conversion is needed, just return the original buffer
  1780. if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1781. // first pass: get length in utf8 units
  1782. size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
  1783. // allocate buffer of suitable length
  1784. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1785. if (!buffer) return false;
  1786. // second pass: convert latin1 input to utf8
  1787. memcpy(buffer, data, prefix_length);
  1788. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1789. uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
  1790. assert(oend == obegin + length);
  1791. *oend = 0;
  1792. out_buffer = buffer;
  1793. out_length = length + 1;
  1794. return true;
  1795. }
  1796. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1797. {
  1798. // fast path: no conversion required
  1799. if (encoding == encoding_utf8)
  1800. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1801. // source encoding is utf16
  1802. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1803. {
  1804. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1805. return (native_encoding == encoding) ?
  1806. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1807. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1808. }
  1809. // source encoding is utf32
  1810. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1811. {
  1812. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1813. return (native_encoding == encoding) ?
  1814. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1815. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1816. }
  1817. // source encoding is latin1
  1818. if (encoding == encoding_latin1)
  1819. return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
  1820. assert(false && "Invalid encoding"); // unreachable
  1821. return false;
  1822. }
  1823. #endif
  1824. PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
  1825. {
  1826. // get length in utf8 characters
  1827. return wchar_decoder::process(str, length, 0, utf8_counter());
  1828. }
  1829. PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
  1830. {
  1831. // convert to utf8
  1832. uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
  1833. uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
  1834. assert(begin + size == end);
  1835. (void)!end;
  1836. (void)!size;
  1837. }
  1838. #ifndef PUGIXML_NO_STL
  1839. PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
  1840. {
  1841. // first pass: get length in utf8 characters
  1842. size_t size = as_utf8_begin(str, length);
  1843. // allocate resulting string
  1844. std::string result;
  1845. result.resize(size);
  1846. // second pass: convert to utf8
  1847. if (size > 0) as_utf8_end(&result[0], size, str, length);
  1848. return result;
  1849. }
  1850. PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
  1851. {
  1852. const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
  1853. // first pass: get length in wchar_t units
  1854. size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
  1855. // allocate resulting string
  1856. std::basic_string<wchar_t> result;
  1857. result.resize(length);
  1858. // second pass: convert to wchar_t
  1859. if (length > 0)
  1860. {
  1861. wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
  1862. wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
  1863. assert(begin + length == end);
  1864. (void)!end;
  1865. }
  1866. return result;
  1867. }
  1868. #endif
  1869. template <typename Header>
  1870. inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
  1871. {
  1872. // never reuse shared memory
  1873. if (header & xml_memory_page_contents_shared_mask) return false;
  1874. size_t target_length = strlength(target);
  1875. // always reuse document buffer memory if possible
  1876. if ((header & header_mask) == 0) return target_length >= length;
  1877. // reuse heap memory if waste is not too great
  1878. const size_t reuse_threshold = 32;
  1879. return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
  1880. }
  1881. template <typename String, typename Header>
  1882. PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
  1883. {
  1884. if (source_length == 0)
  1885. {
  1886. // empty string and null pointer are equivalent, so just deallocate old memory
  1887. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1888. if (header & header_mask) alloc->deallocate_string(dest);
  1889. // mark the string as not allocated
  1890. dest = 0;
  1891. header &= ~header_mask;
  1892. return true;
  1893. }
  1894. else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
  1895. {
  1896. // we can reuse old buffer, so just copy the new data (including zero terminator)
  1897. memcpy(dest, source, source_length * sizeof(char_t));
  1898. dest[source_length] = 0;
  1899. return true;
  1900. }
  1901. else
  1902. {
  1903. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1904. if (!alloc->reserve()) return false;
  1905. // allocate new buffer
  1906. char_t* buf = alloc->allocate_string(source_length + 1);
  1907. if (!buf) return false;
  1908. // copy the string (including zero terminator)
  1909. memcpy(buf, source, source_length * sizeof(char_t));
  1910. buf[source_length] = 0;
  1911. // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
  1912. if (header & header_mask) alloc->deallocate_string(dest);
  1913. // the string is now allocated, so set the flag
  1914. dest = buf;
  1915. header |= header_mask;
  1916. return true;
  1917. }
  1918. }
  1919. struct gap
  1920. {
  1921. char_t* end;
  1922. size_t size;
  1923. gap(): end(0), size(0)
  1924. {
  1925. }
  1926. // Push new gap, move s count bytes further (skipping the gap).
  1927. // Collapse previous gap.
  1928. void push(char_t*& s, size_t count)
  1929. {
  1930. if (end) // there was a gap already; collapse it
  1931. {
  1932. // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
  1933. assert(s >= end);
  1934. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1935. }
  1936. s += count; // end of current gap
  1937. // "merge" two gaps
  1938. end = s;
  1939. size += count;
  1940. }
  1941. // Collapse all gaps, return past-the-end pointer
  1942. char_t* flush(char_t* s)
  1943. {
  1944. if (end)
  1945. {
  1946. // Move [old_gap_end, current_pos) to [old_gap_start, ...)
  1947. assert(s >= end);
  1948. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1949. return s - size;
  1950. }
  1951. else return s;
  1952. }
  1953. };
  1954. PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
  1955. {
  1956. char_t* stre = s + 1;
  1957. switch (*stre)
  1958. {
  1959. case '#': // &#...
  1960. {
  1961. unsigned int ucsc = 0;
  1962. if (stre[1] == 'x') // &#x... (hex code)
  1963. {
  1964. stre += 2;
  1965. char_t ch = *stre;
  1966. if (ch == ';') return stre;
  1967. for (;;)
  1968. {
  1969. if (static_cast<unsigned int>(ch - '0') <= 9)
  1970. ucsc = 16 * ucsc + (ch - '0');
  1971. else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
  1972. ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
  1973. else if (ch == ';')
  1974. break;
  1975. else // cancel
  1976. return stre;
  1977. ch = *++stre;
  1978. }
  1979. ++stre;
  1980. }
  1981. else // &#... (dec code)
  1982. {
  1983. char_t ch = *++stre;
  1984. if (ch == ';') return stre;
  1985. for (;;)
  1986. {
  1987. if (static_cast<unsigned int>(ch - '0') <= 9)
  1988. ucsc = 10 * ucsc + (ch - '0');
  1989. else if (ch == ';')
  1990. break;
  1991. else // cancel
  1992. return stre;
  1993. ch = *++stre;
  1994. }
  1995. ++stre;
  1996. }
  1997. #ifdef PUGIXML_WCHAR_MODE
  1998. s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
  1999. #else
  2000. s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
  2001. #endif
  2002. g.push(s, stre - s);
  2003. return stre;
  2004. }
  2005. case 'a': // &a
  2006. {
  2007. ++stre;
  2008. if (*stre == 'm') // &am
  2009. {
  2010. if (*++stre == 'p' && *++stre == ';') // &amp;
  2011. {
  2012. *s++ = '&';
  2013. ++stre;
  2014. g.push(s, stre - s);
  2015. return stre;
  2016. }
  2017. }
  2018. else if (*stre == 'p') // &ap
  2019. {
  2020. if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
  2021. {
  2022. *s++ = '\'';
  2023. ++stre;
  2024. g.push(s, stre - s);
  2025. return stre;
  2026. }
  2027. }
  2028. break;
  2029. }
  2030. case 'g': // &g
  2031. {
  2032. if (*++stre == 't' && *++stre == ';') // &gt;
  2033. {
  2034. *s++ = '>';
  2035. ++stre;
  2036. g.push(s, stre - s);
  2037. return stre;
  2038. }
  2039. break;
  2040. }
  2041. case 'l': // &l
  2042. {
  2043. if (*++stre == 't' && *++stre == ';') // &lt;
  2044. {
  2045. *s++ = '<';
  2046. ++stre;
  2047. g.push(s, stre - s);
  2048. return stre;
  2049. }
  2050. break;
  2051. }
  2052. case 'q': // &q
  2053. {
  2054. if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
  2055. {
  2056. *s++ = '"';
  2057. ++stre;
  2058. g.push(s, stre - s);
  2059. return stre;
  2060. }
  2061. break;
  2062. }
  2063. default:
  2064. break;
  2065. }
  2066. return stre;
  2067. }
  2068. // Parser utilities
  2069. #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
  2070. #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
  2071. #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
  2072. #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
  2073. #define PUGI__POPNODE() { cursor = cursor->parent; }
  2074. #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
  2075. #define PUGI__SCANWHILE(X) { while (X) ++s; }
  2076. #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
  2077. #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
  2078. #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
  2079. #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
  2080. PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
  2081. {
  2082. gap g;
  2083. while (true)
  2084. {
  2085. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
  2086. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2087. {
  2088. *s++ = '\n'; // replace first one with 0x0a
  2089. if (*s == '\n') g.push(s, 1);
  2090. }
  2091. else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
  2092. {
  2093. *g.flush(s) = 0;
  2094. return s + (s[2] == '>' ? 3 : 2);
  2095. }
  2096. else if (*s == 0)
  2097. {
  2098. return 0;
  2099. }
  2100. else ++s;
  2101. }
  2102. }
  2103. PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
  2104. {
  2105. gap g;
  2106. while (true)
  2107. {
  2108. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
  2109. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2110. {
  2111. *s++ = '\n'; // replace first one with 0x0a
  2112. if (*s == '\n') g.push(s, 1);
  2113. }
  2114. else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
  2115. {
  2116. *g.flush(s) = 0;
  2117. return s + 1;
  2118. }
  2119. else if (*s == 0)
  2120. {
  2121. return 0;
  2122. }
  2123. else ++s;
  2124. }
  2125. }
  2126. typedef char_t* (*strconv_pcdata_t)(char_t*);
  2127. template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
  2128. {
  2129. static char_t* parse(char_t* s)
  2130. {
  2131. gap g;
  2132. char_t* begin = s;
  2133. while (true)
  2134. {
  2135. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
  2136. if (*s == '<') // PCDATA ends here
  2137. {
  2138. char_t* end = g.flush(s);
  2139. if (opt_trim::value)
  2140. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2141. --end;
  2142. *end = 0;
  2143. return s + 1;
  2144. }
  2145. else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2146. {
  2147. *s++ = '\n'; // replace first one with 0x0a
  2148. if (*s == '\n') g.push(s, 1);
  2149. }
  2150. else if (opt_escape::value && *s == '&')
  2151. {
  2152. s = strconv_escape(s, g);
  2153. }
  2154. else if (*s == 0)
  2155. {
  2156. char_t* end = g.flush(s);
  2157. if (opt_trim::value)
  2158. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2159. --end;
  2160. *end = 0;
  2161. return s;
  2162. }
  2163. else ++s;
  2164. }
  2165. }
  2166. };
  2167. PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
  2168. {
  2169. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
  2170. switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
  2171. {
  2172. case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
  2173. case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
  2174. case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
  2175. case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
  2176. case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
  2177. case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
  2178. case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
  2179. case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
  2180. default: assert(false); return 0; // unreachable
  2181. }
  2182. }
  2183. typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
  2184. template <typename opt_escape> struct strconv_attribute_impl
  2185. {
  2186. static char_t* parse_wnorm(char_t* s, char_t end_quote)
  2187. {
  2188. gap g;
  2189. // trim leading whitespaces
  2190. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2191. {
  2192. char_t* str = s;
  2193. do ++str;
  2194. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2195. g.push(s, str - s);
  2196. }
  2197. while (true)
  2198. {
  2199. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
  2200. if (*s == end_quote)
  2201. {
  2202. char_t* str = g.flush(s);
  2203. do *str-- = 0;
  2204. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2205. return s + 1;
  2206. }
  2207. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2208. {
  2209. *s++ = ' ';
  2210. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2211. {
  2212. char_t* str = s + 1;
  2213. while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
  2214. g.push(s, str - s);
  2215. }
  2216. }
  2217. else if (opt_escape::value && *s == '&')
  2218. {
  2219. s = strconv_escape(s, g);
  2220. }
  2221. else if (!*s)
  2222. {
  2223. return 0;
  2224. }
  2225. else ++s;
  2226. }
  2227. }
  2228. static char_t* parse_wconv(char_t* s, char_t end_quote)
  2229. {
  2230. gap g;
  2231. while (true)
  2232. {
  2233. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
  2234. if (*s == end_quote)
  2235. {
  2236. *g.flush(s) = 0;
  2237. return s + 1;
  2238. }
  2239. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2240. {
  2241. if (*s == '\r')
  2242. {
  2243. *s++ = ' ';
  2244. if (*s == '\n') g.push(s, 1);
  2245. }
  2246. else *s++ = ' ';
  2247. }
  2248. else if (opt_escape::value && *s == '&')
  2249. {
  2250. s = strconv_escape(s, g);
  2251. }
  2252. else if (!*s)
  2253. {
  2254. return 0;
  2255. }
  2256. else ++s;
  2257. }
  2258. }
  2259. static char_t* parse_eol(char_t* s, char_t end_quote)
  2260. {
  2261. gap g;
  2262. while (true)
  2263. {
  2264. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2265. if (*s == end_quote)
  2266. {
  2267. *g.flush(s) = 0;
  2268. return s + 1;
  2269. }
  2270. else if (*s == '\r')
  2271. {
  2272. *s++ = '\n';
  2273. if (*s == '\n') g.push(s, 1);
  2274. }
  2275. else if (opt_escape::value && *s == '&')
  2276. {
  2277. s = strconv_escape(s, g);
  2278. }
  2279. else if (!*s)
  2280. {
  2281. return 0;
  2282. }
  2283. else ++s;
  2284. }
  2285. }
  2286. static char_t* parse_simple(char_t* s, char_t end_quote)
  2287. {
  2288. gap g;
  2289. while (true)
  2290. {
  2291. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2292. if (*s == end_quote)
  2293. {
  2294. *g.flush(s) = 0;
  2295. return s + 1;
  2296. }
  2297. else if (opt_escape::value && *s == '&')
  2298. {
  2299. s = strconv_escape(s, g);
  2300. }
  2301. else if (!*s)
  2302. {
  2303. return 0;
  2304. }
  2305. else ++s;
  2306. }
  2307. }
  2308. };
  2309. PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
  2310. {
  2311. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
  2312. switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
  2313. {
  2314. case 0: return strconv_attribute_impl<opt_false>::parse_simple;
  2315. case 1: return strconv_attribute_impl<opt_true>::parse_simple;
  2316. case 2: return strconv_attribute_impl<opt_false>::parse_eol;
  2317. case 3: return strconv_attribute_impl<opt_true>::parse_eol;
  2318. case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
  2319. case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
  2320. case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
  2321. case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
  2322. case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2323. case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2324. case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2325. case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2326. case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2327. case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2328. case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2329. case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2330. default: assert(false); return 0; // unreachable
  2331. }
  2332. }
  2333. inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
  2334. {
  2335. xml_parse_result result;
  2336. result.status = status;
  2337. result.offset = offset;
  2338. return result;
  2339. }
  2340. struct xml_parser
  2341. {
  2342. xml_allocator* alloc;
  2343. char_t* error_offset;
  2344. xml_parse_status error_status;
  2345. xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
  2346. {
  2347. }
  2348. // DOCTYPE consists of nested sections of the following possible types:
  2349. // <!-- ... -->, <? ... ?>, "...", '...'
  2350. // <![...]]>
  2351. // <!...>
  2352. // First group can not contain nested groups
  2353. // Second group can contain nested groups of the same type
  2354. // Third group can contain all other groups
  2355. char_t* parse_doctype_primitive(char_t* s)
  2356. {
  2357. if (*s == '"' || *s == '\'')
  2358. {
  2359. // quoted string
  2360. char_t ch = *s++;
  2361. PUGI__SCANFOR(*s == ch);
  2362. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2363. s++;
  2364. }
  2365. else if (s[0] == '<' && s[1] == '?')
  2366. {
  2367. // <? ... ?>
  2368. s += 2;
  2369. PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
  2370. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2371. s += 2;
  2372. }
  2373. else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
  2374. {
  2375. s += 4;
  2376. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
  2377. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2378. s += 3;
  2379. }
  2380. else PUGI__THROW_ERROR(status_bad_doctype, s);
  2381. return s;
  2382. }
  2383. char_t* parse_doctype_ignore(char_t* s)
  2384. {
  2385. size_t depth = 0;
  2386. assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
  2387. s += 3;
  2388. while (*s)
  2389. {
  2390. if (s[0] == '<' && s[1] == '!' && s[2] == '[')
  2391. {
  2392. // nested ignore section
  2393. s += 3;
  2394. depth++;
  2395. }
  2396. else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
  2397. {
  2398. // ignore section end
  2399. s += 3;
  2400. if (depth == 0)
  2401. return s;
  2402. depth--;
  2403. }
  2404. else s++;
  2405. }
  2406. PUGI__THROW_ERROR(status_bad_doctype, s);
  2407. }
  2408. char_t* parse_doctype_group(char_t* s, char_t endch)
  2409. {
  2410. size_t depth = 0;
  2411. assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
  2412. s += 2;
  2413. while (*s)
  2414. {
  2415. if (s[0] == '<' && s[1] == '!' && s[2] != '-')
  2416. {
  2417. if (s[2] == '[')
  2418. {
  2419. // ignore
  2420. s = parse_doctype_ignore(s);
  2421. if (!s) return s;
  2422. }
  2423. else
  2424. {
  2425. // some control group
  2426. s += 2;
  2427. depth++;
  2428. }
  2429. }
  2430. else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
  2431. {
  2432. // unknown tag (forbidden), or some primitive group
  2433. s = parse_doctype_primitive(s);
  2434. if (!s) return s;
  2435. }
  2436. else if (*s == '>')
  2437. {
  2438. if (depth == 0)
  2439. return s;
  2440. depth--;
  2441. s++;
  2442. }
  2443. else s++;
  2444. }
  2445. if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
  2446. return s;
  2447. }
  2448. char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
  2449. {
  2450. // parse node contents, starting with exclamation mark
  2451. ++s;
  2452. if (*s == '-') // '<!-...'
  2453. {
  2454. ++s;
  2455. if (*s == '-') // '<!--...'
  2456. {
  2457. ++s;
  2458. if (PUGI__OPTSET(parse_comments))
  2459. {
  2460. PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
  2461. cursor->value = s; // Save the offset.
  2462. }
  2463. if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
  2464. {
  2465. s = strconv_comment(s, endch);
  2466. if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
  2467. }
  2468. else
  2469. {
  2470. // Scan for terminating '-->'.
  2471. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
  2472. PUGI__CHECK_ERROR(status_bad_comment, s);
  2473. if (PUGI__OPTSET(parse_comments))
  2474. *s = 0; // Zero-terminate this segment at the first terminating '-'.
  2475. s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
  2476. }
  2477. }
  2478. else PUGI__THROW_ERROR(status_bad_comment, s);
  2479. }
  2480. else if (*s == '[')
  2481. {
  2482. // '<![CDATA[...'
  2483. if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
  2484. {
  2485. ++s;
  2486. if (PUGI__OPTSET(parse_cdata))
  2487. {
  2488. PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
  2489. cursor->value = s; // Save the offset.
  2490. if (PUGI__OPTSET(parse_eol))
  2491. {
  2492. s = strconv_cdata(s, endch);
  2493. if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
  2494. }
  2495. else
  2496. {
  2497. // Scan for terminating ']]>'.
  2498. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2499. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2500. *s++ = 0; // Zero-terminate this segment.
  2501. }
  2502. }
  2503. else // Flagged for discard, but we still have to scan for the terminator.
  2504. {
  2505. // Scan for terminating ']]>'.
  2506. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2507. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2508. ++s;
  2509. }
  2510. s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
  2511. }
  2512. else PUGI__THROW_ERROR(status_bad_cdata, s);
  2513. }
  2514. else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
  2515. {
  2516. s -= 2;
  2517. if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
  2518. char_t* mark = s + 9;
  2519. s = parse_doctype_group(s, endch);
  2520. if (!s) return s;
  2521. assert((*s == 0 && endch == '>') || *s == '>');
  2522. if (*s) *s++ = 0;
  2523. if (PUGI__OPTSET(parse_doctype))
  2524. {
  2525. while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
  2526. PUGI__PUSHNODE(node_doctype);
  2527. cursor->value = mark;
  2528. }
  2529. }
  2530. else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
  2531. else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
  2532. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2533. return s;
  2534. }
  2535. char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
  2536. {
  2537. // load into registers
  2538. xml_node_struct* cursor = ref_cursor;
  2539. char_t ch = 0;
  2540. // parse node contents, starting with question mark
  2541. ++s;
  2542. // read PI target
  2543. char_t* target = s;
  2544. if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
  2545. PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
  2546. PUGI__CHECK_ERROR(status_bad_pi, s);
  2547. // determine node type; stricmp / strcasecmp is not portable
  2548. bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
  2549. if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
  2550. {
  2551. if (declaration)
  2552. {
  2553. // disallow non top-level declarations
  2554. if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
  2555. PUGI__PUSHNODE(node_declaration);
  2556. }
  2557. else
  2558. {
  2559. PUGI__PUSHNODE(node_pi);
  2560. }
  2561. cursor->name = target;
  2562. PUGI__ENDSEG();
  2563. // parse value/attributes
  2564. if (ch == '?')
  2565. {
  2566. // empty node
  2567. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
  2568. s += (*s == '>');
  2569. PUGI__POPNODE();
  2570. }
  2571. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2572. {
  2573. PUGI__SKIPWS();
  2574. // scan for tag end
  2575. char_t* value = s;
  2576. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2577. PUGI__CHECK_ERROR(status_bad_pi, s);
  2578. if (declaration)
  2579. {
  2580. // replace ending ? with / so that 'element' terminates properly
  2581. *s = '/';
  2582. // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
  2583. s = value;
  2584. }
  2585. else
  2586. {
  2587. // store value and step over >
  2588. cursor->value = value;
  2589. PUGI__POPNODE();
  2590. PUGI__ENDSEG();
  2591. s += (*s == '>');
  2592. }
  2593. }
  2594. else PUGI__THROW_ERROR(status_bad_pi, s);
  2595. }
  2596. else
  2597. {
  2598. // scan for tag end
  2599. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2600. PUGI__CHECK_ERROR(status_bad_pi, s);
  2601. s += (s[1] == '>' ? 2 : 1);
  2602. }
  2603. // store from registers
  2604. ref_cursor = cursor;
  2605. return s;
  2606. }
  2607. char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
  2608. {
  2609. strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
  2610. strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
  2611. char_t ch = 0;
  2612. xml_node_struct* cursor = root;
  2613. char_t* mark = s;
  2614. while (*s != 0)
  2615. {
  2616. if (*s == '<')
  2617. {
  2618. ++s;
  2619. LOC_TAG:
  2620. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
  2621. {
  2622. PUGI__PUSHNODE(node_element); // Append a new node to the tree.
  2623. cursor->name = s;
  2624. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2625. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2626. if (ch == '>')
  2627. {
  2628. // end of tag
  2629. }
  2630. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2631. {
  2632. LOC_ATTRIBUTES:
  2633. while (true)
  2634. {
  2635. PUGI__SKIPWS(); // Eat any whitespace.
  2636. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
  2637. {
  2638. xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
  2639. if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
  2640. a->name = s; // Save the offset.
  2641. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2642. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2643. if (PUGI__IS_CHARTYPE(ch, ct_space))
  2644. {
  2645. PUGI__SKIPWS(); // Eat any whitespace.
  2646. ch = *s;
  2647. ++s;
  2648. }
  2649. if (ch == '=') // '<... #=...'
  2650. {
  2651. PUGI__SKIPWS(); // Eat any whitespace.
  2652. if (*s == '"' || *s == '\'') // '<... #="...'
  2653. {
  2654. ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
  2655. ++s; // Step over the quote.
  2656. a->value = s; // Save the offset.
  2657. s = strconv_attribute(s, ch);
  2658. if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
  2659. // After this line the loop continues from the start;
  2660. // Whitespaces, / and > are ok, symbols and EOF are wrong,
  2661. // everything else will be detected
  2662. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
  2663. }
  2664. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2665. }
  2666. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2667. }
  2668. else if (*s == '/')
  2669. {
  2670. ++s;
  2671. if (*s == '>')
  2672. {
  2673. PUGI__POPNODE();
  2674. s++;
  2675. break;
  2676. }
  2677. else if (*s == 0 && endch == '>')
  2678. {
  2679. PUGI__POPNODE();
  2680. break;
  2681. }
  2682. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2683. }
  2684. else if (*s == '>')
  2685. {
  2686. ++s;
  2687. break;
  2688. }
  2689. else if (*s == 0 && endch == '>')
  2690. {
  2691. break;
  2692. }
  2693. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2694. }
  2695. // !!!
  2696. }
  2697. else if (ch == '/') // '<#.../'
  2698. {
  2699. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
  2700. PUGI__POPNODE(); // Pop.
  2701. s += (*s == '>');
  2702. }
  2703. else if (ch == 0)
  2704. {
  2705. // we stepped over null terminator, backtrack & handle closing tag
  2706. --s;
  2707. if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
  2708. }
  2709. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2710. }
  2711. else if (*s == '/')
  2712. {
  2713. ++s;
  2714. mark = s;
  2715. char_t* name = cursor->name;
  2716. if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2717. while (PUGI__IS_CHARTYPE(*s, ct_symbol))
  2718. {
  2719. if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2720. }
  2721. if (*name)
  2722. {
  2723. if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
  2724. else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2725. }
  2726. PUGI__POPNODE(); // Pop.
  2727. PUGI__SKIPWS();
  2728. if (*s == 0)
  2729. {
  2730. if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2731. }
  2732. else
  2733. {
  2734. if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2735. ++s;
  2736. }
  2737. }
  2738. else if (*s == '?') // '<?...'
  2739. {
  2740. s = parse_question(s, cursor, optmsk, endch);
  2741. if (!s) return s;
  2742. assert(cursor);
  2743. if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
  2744. }
  2745. else if (*s == '!') // '<!...'
  2746. {
  2747. s = parse_exclamation(s, cursor, optmsk, endch);
  2748. if (!s) return s;
  2749. }
  2750. else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
  2751. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2752. }
  2753. else
  2754. {
  2755. mark = s; // Save this offset while searching for a terminator.
  2756. PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
  2757. if (*s == '<' || !*s)
  2758. {
  2759. // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
  2760. assert(mark != s);
  2761. if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
  2762. {
  2763. continue;
  2764. }
  2765. else if (PUGI__OPTSET(parse_ws_pcdata_single))
  2766. {
  2767. if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
  2768. }
  2769. }
  2770. if (!PUGI__OPTSET(parse_trim_pcdata))
  2771. s = mark;
  2772. if (cursor->parent || PUGI__OPTSET(parse_fragment))
  2773. {
  2774. if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
  2775. {
  2776. cursor->value = s; // Save the offset.
  2777. }
  2778. else
  2779. {
  2780. PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
  2781. cursor->value = s; // Save the offset.
  2782. PUGI__POPNODE(); // Pop since this is a standalone.
  2783. }
  2784. s = strconv_pcdata(s);
  2785. if (!*s) break;
  2786. }
  2787. else
  2788. {
  2789. PUGI__SCANFOR(*s == '<'); // '...<'
  2790. if (!*s) break;
  2791. ++s;
  2792. }
  2793. // We're after '<'
  2794. goto LOC_TAG;
  2795. }
  2796. }
  2797. // check that last tag is closed
  2798. if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
  2799. return s;
  2800. }
  2801. #ifdef PUGIXML_WCHAR_MODE
  2802. static char_t* parse_skip_bom(char_t* s)
  2803. {
  2804. unsigned int bom = 0xfeff;
  2805. return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
  2806. }
  2807. #else
  2808. static char_t* parse_skip_bom(char_t* s)
  2809. {
  2810. return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
  2811. }
  2812. #endif
  2813. static bool has_element_node_siblings(xml_node_struct* node)
  2814. {
  2815. while (node)
  2816. {
  2817. if (PUGI__NODETYPE(node) == node_element) return true;
  2818. node = node->next_sibling;
  2819. }
  2820. return false;
  2821. }
  2822. static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
  2823. {
  2824. // early-out for empty documents
  2825. if (length == 0)
  2826. return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
  2827. // get last child of the root before parsing
  2828. xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
  2829. // create parser on stack
  2830. xml_parser parser(static_cast<xml_allocator*>(xmldoc));
  2831. // save last character and make buffer zero-terminated (speeds up parsing)
  2832. char_t endch = buffer[length - 1];
  2833. buffer[length - 1] = 0;
  2834. // skip BOM to make sure it does not end up as part of parse output
  2835. char_t* buffer_data = parse_skip_bom(buffer);
  2836. // perform actual parsing
  2837. parser.parse_tree(buffer_data, root, optmsk, endch);
  2838. xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
  2839. assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
  2840. if (result)
  2841. {
  2842. // since we removed last character, we have to handle the only possible false positive (stray <)
  2843. if (endch == '<')
  2844. return make_parse_result(status_unrecognized_tag, length - 1);
  2845. // check if there are any element nodes parsed
  2846. xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
  2847. if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
  2848. return make_parse_result(status_no_document_element, length - 1);
  2849. }
  2850. else
  2851. {
  2852. // roll back offset if it occurs on a null terminator in the source buffer
  2853. if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
  2854. result.offset--;
  2855. }
  2856. return result;
  2857. }
  2858. };
  2859. // Output facilities
  2860. PUGI__FN xml_encoding get_write_native_encoding()
  2861. {
  2862. #ifdef PUGIXML_WCHAR_MODE
  2863. return get_wchar_encoding();
  2864. #else
  2865. return encoding_utf8;
  2866. #endif
  2867. }
  2868. PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
  2869. {
  2870. // replace wchar encoding with utf implementation
  2871. if (encoding == encoding_wchar) return get_wchar_encoding();
  2872. // replace utf16 encoding with utf16 with specific endianness
  2873. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2874. // replace utf32 encoding with utf32 with specific endianness
  2875. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2876. // only do autodetection if no explicit encoding is requested
  2877. if (encoding != encoding_auto) return encoding;
  2878. // assume utf8 encoding
  2879. return encoding_utf8;
  2880. }
  2881. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
  2882. {
  2883. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2884. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2885. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2886. }
  2887. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
  2888. {
  2889. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2890. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2891. if (opt_swap)
  2892. {
  2893. for (typename T::value_type i = dest; i != end; ++i)
  2894. *i = endian_swap(*i);
  2895. }
  2896. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2897. }
  2898. #ifdef PUGIXML_WCHAR_MODE
  2899. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2900. {
  2901. if (length < 1) return 0;
  2902. // discard last character if it's the lead of a surrogate pair
  2903. return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
  2904. }
  2905. PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2906. {
  2907. // only endian-swapping is required
  2908. if (need_endian_swap_utf(encoding, get_wchar_encoding()))
  2909. {
  2910. convert_wchar_endian_swap(r_char, data, length);
  2911. return length * sizeof(char_t);
  2912. }
  2913. // convert to utf8
  2914. if (encoding == encoding_utf8)
  2915. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
  2916. // convert to utf16
  2917. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2918. {
  2919. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2920. return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
  2921. }
  2922. // convert to utf32
  2923. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2924. {
  2925. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2926. return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
  2927. }
  2928. // convert to latin1
  2929. if (encoding == encoding_latin1)
  2930. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
  2931. assert(false && "Invalid encoding"); // unreachable
  2932. return 0;
  2933. }
  2934. #else
  2935. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2936. {
  2937. if (length < 5) return 0;
  2938. for (size_t i = 1; i <= 4; ++i)
  2939. {
  2940. uint8_t ch = static_cast<uint8_t>(data[length - i]);
  2941. // either a standalone character or a leading one
  2942. if ((ch & 0xc0) != 0x80) return length - i;
  2943. }
  2944. // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
  2945. return length;
  2946. }
  2947. PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2948. {
  2949. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2950. {
  2951. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2952. return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
  2953. }
  2954. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2955. {
  2956. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2957. return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
  2958. }
  2959. if (encoding == encoding_latin1)
  2960. return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
  2961. assert(false && "Invalid encoding"); // unreachable
  2962. return 0;
  2963. }
  2964. #endif
  2965. class xml_buffered_writer
  2966. {
  2967. xml_buffered_writer(const xml_buffered_writer&);
  2968. xml_buffered_writer& operator=(const xml_buffered_writer&);
  2969. public:
  2970. xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
  2971. {
  2972. PUGI__STATIC_ASSERT(bufcapacity >= 8);
  2973. }
  2974. size_t flush()
  2975. {
  2976. flush(buffer, bufsize);
  2977. bufsize = 0;
  2978. return 0;
  2979. }
  2980. void flush(const char_t* data, size_t size)
  2981. {
  2982. if (size == 0) return;
  2983. // fast path, just write data
  2984. if (encoding == get_write_native_encoding())
  2985. writer.write(data, size * sizeof(char_t));
  2986. else
  2987. {
  2988. // convert chunk
  2989. size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
  2990. assert(result <= sizeof(scratch));
  2991. // write data
  2992. writer.write(scratch.data_u8, result);
  2993. }
  2994. }
  2995. void write_direct(const char_t* data, size_t length)
  2996. {
  2997. // flush the remaining buffer contents
  2998. flush();
  2999. // handle large chunks
  3000. if (length > bufcapacity)
  3001. {
  3002. if (encoding == get_write_native_encoding())
  3003. {
  3004. // fast path, can just write data chunk
  3005. writer.write(data, length * sizeof(char_t));
  3006. return;
  3007. }
  3008. // need to convert in suitable chunks
  3009. while (length > bufcapacity)
  3010. {
  3011. // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
  3012. // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
  3013. size_t chunk_size = get_valid_length(data, bufcapacity);
  3014. assert(chunk_size);
  3015. // convert chunk and write
  3016. flush(data, chunk_size);
  3017. // iterate
  3018. data += chunk_size;
  3019. length -= chunk_size;
  3020. }
  3021. // small tail is copied below
  3022. bufsize = 0;
  3023. }
  3024. memcpy(buffer + bufsize, data, length * sizeof(char_t));
  3025. bufsize += length;
  3026. }
  3027. void write_buffer(const char_t* data, size_t length)
  3028. {
  3029. size_t offset = bufsize;
  3030. if (offset + length <= bufcapacity)
  3031. {
  3032. memcpy(buffer + offset, data, length * sizeof(char_t));
  3033. bufsize = offset + length;
  3034. }
  3035. else
  3036. {
  3037. write_direct(data, length);
  3038. }
  3039. }
  3040. void write_string(const char_t* data)
  3041. {
  3042. // write the part of the string that fits in the buffer
  3043. size_t offset = bufsize;
  3044. while (*data && offset < bufcapacity)
  3045. buffer[offset++] = *data++;
  3046. // write the rest
  3047. if (offset < bufcapacity)
  3048. {
  3049. bufsize = offset;
  3050. }
  3051. else
  3052. {
  3053. // backtrack a bit if we have split the codepoint
  3054. size_t length = offset - bufsize;
  3055. size_t extra = length - get_valid_length(data - length, length);
  3056. bufsize = offset - extra;
  3057. write_direct(data - extra, strlength(data) + extra);
  3058. }
  3059. }
  3060. void write(char_t d0)
  3061. {
  3062. size_t offset = bufsize;
  3063. if (offset > bufcapacity - 1) offset = flush();
  3064. buffer[offset + 0] = d0;
  3065. bufsize = offset + 1;
  3066. }
  3067. void write(char_t d0, char_t d1)
  3068. {
  3069. size_t offset = bufsize;
  3070. if (offset > bufcapacity - 2) offset = flush();
  3071. buffer[offset + 0] = d0;
  3072. buffer[offset + 1] = d1;
  3073. bufsize = offset + 2;
  3074. }
  3075. void write(char_t d0, char_t d1, char_t d2)
  3076. {
  3077. size_t offset = bufsize;
  3078. if (offset > bufcapacity - 3) offset = flush();
  3079. buffer[offset + 0] = d0;
  3080. buffer[offset + 1] = d1;
  3081. buffer[offset + 2] = d2;
  3082. bufsize = offset + 3;
  3083. }
  3084. void write(char_t d0, char_t d1, char_t d2, char_t d3)
  3085. {
  3086. size_t offset = bufsize;
  3087. if (offset > bufcapacity - 4) offset = flush();
  3088. buffer[offset + 0] = d0;
  3089. buffer[offset + 1] = d1;
  3090. buffer[offset + 2] = d2;
  3091. buffer[offset + 3] = d3;
  3092. bufsize = offset + 4;
  3093. }
  3094. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
  3095. {
  3096. size_t offset = bufsize;
  3097. if (offset > bufcapacity - 5) offset = flush();
  3098. buffer[offset + 0] = d0;
  3099. buffer[offset + 1] = d1;
  3100. buffer[offset + 2] = d2;
  3101. buffer[offset + 3] = d3;
  3102. buffer[offset + 4] = d4;
  3103. bufsize = offset + 5;
  3104. }
  3105. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
  3106. {
  3107. size_t offset = bufsize;
  3108. if (offset > bufcapacity - 6) offset = flush();
  3109. buffer[offset + 0] = d0;
  3110. buffer[offset + 1] = d1;
  3111. buffer[offset + 2] = d2;
  3112. buffer[offset + 3] = d3;
  3113. buffer[offset + 4] = d4;
  3114. buffer[offset + 5] = d5;
  3115. bufsize = offset + 6;
  3116. }
  3117. // utf8 maximum expansion: x4 (-> utf32)
  3118. // utf16 maximum expansion: x2 (-> utf32)
  3119. // utf32 maximum expansion: x1
  3120. enum
  3121. {
  3122. bufcapacitybytes =
  3123. #ifdef PUGIXML_MEMORY_OUTPUT_STACK
  3124. PUGIXML_MEMORY_OUTPUT_STACK
  3125. #else
  3126. 10240
  3127. #endif
  3128. ,
  3129. bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
  3130. };
  3131. char_t buffer[bufcapacity];
  3132. union
  3133. {
  3134. uint8_t data_u8[4 * bufcapacity];
  3135. uint16_t data_u16[2 * bufcapacity];
  3136. uint32_t data_u32[bufcapacity];
  3137. char_t data_char[bufcapacity];
  3138. } scratch;
  3139. xml_writer& writer;
  3140. size_t bufsize;
  3141. xml_encoding encoding;
  3142. };
  3143. PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3144. {
  3145. while (*s)
  3146. {
  3147. const char_t* prev = s;
  3148. // While *s is a usual symbol
  3149. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
  3150. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3151. switch (*s)
  3152. {
  3153. case 0: break;
  3154. case '&':
  3155. writer.write('&', 'a', 'm', 'p', ';');
  3156. ++s;
  3157. break;
  3158. case '<':
  3159. writer.write('&', 'l', 't', ';');
  3160. ++s;
  3161. break;
  3162. case '>':
  3163. writer.write('&', 'g', 't', ';');
  3164. ++s;
  3165. break;
  3166. case '"':
  3167. if (flags & format_attribute_single_quote)
  3168. writer.write('"');
  3169. else
  3170. writer.write('&', 'q', 'u', 'o', 't', ';');
  3171. ++s;
  3172. break;
  3173. case '\'':
  3174. if (flags & format_attribute_single_quote)
  3175. writer.write('&', 'a', 'p', 'o', 's', ';');
  3176. else
  3177. writer.write('\'');
  3178. ++s;
  3179. break;
  3180. default: // s is not a usual symbol
  3181. {
  3182. unsigned int ch = static_cast<unsigned int>(*s++);
  3183. assert(ch < 32);
  3184. if (!(flags & format_skip_control_chars))
  3185. writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
  3186. }
  3187. }
  3188. }
  3189. }
  3190. PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3191. {
  3192. if (flags & format_no_escapes)
  3193. writer.write_string(s);
  3194. else
  3195. text_output_escaped(writer, s, type, flags);
  3196. }
  3197. PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
  3198. {
  3199. do
  3200. {
  3201. writer.write('<', '!', '[', 'C', 'D');
  3202. writer.write('A', 'T', 'A', '[');
  3203. const char_t* prev = s;
  3204. // look for ]]> sequence - we can't output it as is since it terminates CDATA
  3205. while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
  3206. // skip ]] if we stopped at ]]>, > will go to the next CDATA section
  3207. if (*s) s += 2;
  3208. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3209. writer.write(']', ']', '>');
  3210. }
  3211. while (*s);
  3212. }
  3213. PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
  3214. {
  3215. switch (indent_length)
  3216. {
  3217. case 1:
  3218. {
  3219. for (unsigned int i = 0; i < depth; ++i)
  3220. writer.write(indent[0]);
  3221. break;
  3222. }
  3223. case 2:
  3224. {
  3225. for (unsigned int i = 0; i < depth; ++i)
  3226. writer.write(indent[0], indent[1]);
  3227. break;
  3228. }
  3229. case 3:
  3230. {
  3231. for (unsigned int i = 0; i < depth; ++i)
  3232. writer.write(indent[0], indent[1], indent[2]);
  3233. break;
  3234. }
  3235. case 4:
  3236. {
  3237. for (unsigned int i = 0; i < depth; ++i)
  3238. writer.write(indent[0], indent[1], indent[2], indent[3]);
  3239. break;
  3240. }
  3241. default:
  3242. {
  3243. for (unsigned int i = 0; i < depth; ++i)
  3244. writer.write_buffer(indent, indent_length);
  3245. }
  3246. }
  3247. }
  3248. PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
  3249. {
  3250. writer.write('<', '!', '-', '-');
  3251. while (*s)
  3252. {
  3253. const char_t* prev = s;
  3254. // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
  3255. while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
  3256. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3257. if (*s)
  3258. {
  3259. assert(*s == '-');
  3260. writer.write('-', ' ');
  3261. ++s;
  3262. }
  3263. }
  3264. writer.write('-', '-', '>');
  3265. }
  3266. PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
  3267. {
  3268. while (*s)
  3269. {
  3270. const char_t* prev = s;
  3271. // look for ?> sequence - we can't output it since ?> terminates PI
  3272. while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
  3273. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3274. if (*s)
  3275. {
  3276. assert(s[0] == '?' && s[1] == '>');
  3277. writer.write('?', ' ', '>');
  3278. s += 2;
  3279. }
  3280. }
  3281. }
  3282. PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3283. {
  3284. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3285. const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
  3286. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3287. {
  3288. if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
  3289. {
  3290. writer.write('\n');
  3291. text_output_indent(writer, indent, indent_length, depth + 1);
  3292. }
  3293. else
  3294. {
  3295. writer.write(' ');
  3296. }
  3297. writer.write_string(a->name ? a->name + 0 : default_name);
  3298. writer.write('=', enquotation_char);
  3299. if (a->value)
  3300. text_output(writer, a->value, ctx_special_attr, flags);
  3301. writer.write(enquotation_char);
  3302. }
  3303. }
  3304. PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3305. {
  3306. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3307. const char_t* name = node->name ? node->name + 0 : default_name;
  3308. writer.write('<');
  3309. writer.write_string(name);
  3310. if (node->first_attribute)
  3311. node_output_attributes(writer, node, indent, indent_length, flags, depth);
  3312. // element nodes can have value if parse_embed_pcdata was used
  3313. if (!node->value)
  3314. {
  3315. if (!node->first_child)
  3316. {
  3317. if (flags & format_no_empty_element_tags)
  3318. {
  3319. writer.write('>', '<', '/');
  3320. writer.write_string(name);
  3321. writer.write('>');
  3322. return false;
  3323. }
  3324. else
  3325. {
  3326. if ((flags & format_raw) == 0)
  3327. writer.write(' ');
  3328. writer.write('/', '>');
  3329. return false;
  3330. }
  3331. }
  3332. else
  3333. {
  3334. writer.write('>');
  3335. return true;
  3336. }
  3337. }
  3338. else
  3339. {
  3340. writer.write('>');
  3341. text_output(writer, node->value, ctx_special_pcdata, flags);
  3342. if (!node->first_child)
  3343. {
  3344. writer.write('<', '/');
  3345. writer.write_string(name);
  3346. writer.write('>');
  3347. return false;
  3348. }
  3349. else
  3350. {
  3351. return true;
  3352. }
  3353. }
  3354. }
  3355. PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
  3356. {
  3357. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3358. const char_t* name = node->name ? node->name + 0 : default_name;
  3359. writer.write('<', '/');
  3360. writer.write_string(name);
  3361. writer.write('>');
  3362. }
  3363. PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
  3364. {
  3365. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3366. switch (PUGI__NODETYPE(node))
  3367. {
  3368. case node_pcdata:
  3369. text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
  3370. break;
  3371. case node_cdata:
  3372. text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3373. break;
  3374. case node_comment:
  3375. node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3376. break;
  3377. case node_pi:
  3378. writer.write('<', '?');
  3379. writer.write_string(node->name ? node->name + 0 : default_name);
  3380. if (node->value)
  3381. {
  3382. writer.write(' ');
  3383. node_output_pi_value(writer, node->value);
  3384. }
  3385. writer.write('?', '>');
  3386. break;
  3387. case node_declaration:
  3388. writer.write('<', '?');
  3389. writer.write_string(node->name ? node->name + 0 : default_name);
  3390. node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
  3391. writer.write('?', '>');
  3392. break;
  3393. case node_doctype:
  3394. writer.write('<', '!', 'D', 'O', 'C');
  3395. writer.write('T', 'Y', 'P', 'E');
  3396. if (node->value)
  3397. {
  3398. writer.write(' ');
  3399. writer.write_string(node->value);
  3400. }
  3401. writer.write('>');
  3402. break;
  3403. default:
  3404. assert(false && "Invalid node type"); // unreachable
  3405. }
  3406. }
  3407. enum indent_flags_t
  3408. {
  3409. indent_newline = 1,
  3410. indent_indent = 2
  3411. };
  3412. PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
  3413. {
  3414. size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
  3415. unsigned int indent_flags = indent_indent;
  3416. xml_node_struct* node = root;
  3417. do
  3418. {
  3419. assert(node);
  3420. // begin writing current node
  3421. if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
  3422. {
  3423. node_output_simple(writer, node, flags);
  3424. indent_flags = 0;
  3425. }
  3426. else
  3427. {
  3428. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3429. writer.write('\n');
  3430. if ((indent_flags & indent_indent) && indent_length)
  3431. text_output_indent(writer, indent, indent_length, depth);
  3432. if (PUGI__NODETYPE(node) == node_element)
  3433. {
  3434. indent_flags = indent_newline | indent_indent;
  3435. if (node_output_start(writer, node, indent, indent_length, flags, depth))
  3436. {
  3437. // element nodes can have value if parse_embed_pcdata was used
  3438. if (node->value)
  3439. indent_flags = 0;
  3440. node = node->first_child;
  3441. depth++;
  3442. continue;
  3443. }
  3444. }
  3445. else if (PUGI__NODETYPE(node) == node_document)
  3446. {
  3447. indent_flags = indent_indent;
  3448. if (node->first_child)
  3449. {
  3450. node = node->first_child;
  3451. continue;
  3452. }
  3453. }
  3454. else
  3455. {
  3456. node_output_simple(writer, node, flags);
  3457. indent_flags = indent_newline | indent_indent;
  3458. }
  3459. }
  3460. // continue to the next node
  3461. while (node != root)
  3462. {
  3463. if (node->next_sibling)
  3464. {
  3465. node = node->next_sibling;
  3466. break;
  3467. }
  3468. node = node->parent;
  3469. // write closing node
  3470. if (PUGI__NODETYPE(node) == node_element)
  3471. {
  3472. depth--;
  3473. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3474. writer.write('\n');
  3475. if ((indent_flags & indent_indent) && indent_length)
  3476. text_output_indent(writer, indent, indent_length, depth);
  3477. node_output_end(writer, node);
  3478. indent_flags = indent_newline | indent_indent;
  3479. }
  3480. }
  3481. }
  3482. while (node != root);
  3483. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3484. writer.write('\n');
  3485. }
  3486. PUGI__FN bool has_declaration(xml_node_struct* node)
  3487. {
  3488. for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
  3489. {
  3490. xml_node_type type = PUGI__NODETYPE(child);
  3491. if (type == node_declaration) return true;
  3492. if (type == node_element) return false;
  3493. }
  3494. return false;
  3495. }
  3496. PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
  3497. {
  3498. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3499. if (a == attr)
  3500. return true;
  3501. return false;
  3502. }
  3503. PUGI__FN bool allow_insert_attribute(xml_node_type parent)
  3504. {
  3505. return parent == node_element || parent == node_declaration;
  3506. }
  3507. PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
  3508. {
  3509. if (parent != node_document && parent != node_element) return false;
  3510. if (child == node_document || child == node_null) return false;
  3511. if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
  3512. return true;
  3513. }
  3514. PUGI__FN bool allow_move(xml_node parent, xml_node child)
  3515. {
  3516. // check that child can be a child of parent
  3517. if (!allow_insert_child(parent.type(), child.type()))
  3518. return false;
  3519. // check that node is not moved between documents
  3520. if (parent.root() != child.root())
  3521. return false;
  3522. // check that new parent is not in the child subtree
  3523. xml_node cur = parent;
  3524. while (cur)
  3525. {
  3526. if (cur == child)
  3527. return false;
  3528. cur = cur.parent();
  3529. }
  3530. return true;
  3531. }
  3532. template <typename String, typename Header>
  3533. PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
  3534. {
  3535. assert(!dest && (header & header_mask) == 0);
  3536. if (source)
  3537. {
  3538. if (alloc && (source_header & header_mask) == 0)
  3539. {
  3540. dest = source;
  3541. // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
  3542. header |= xml_memory_page_contents_shared_mask;
  3543. source_header |= xml_memory_page_contents_shared_mask;
  3544. }
  3545. else
  3546. strcpy_insitu(dest, header, header_mask, source, strlength(source));
  3547. }
  3548. }
  3549. PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
  3550. {
  3551. node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
  3552. node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
  3553. for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
  3554. {
  3555. xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
  3556. if (da)
  3557. {
  3558. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3559. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3560. }
  3561. }
  3562. }
  3563. PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
  3564. {
  3565. xml_allocator& alloc = get_allocator(dn);
  3566. xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
  3567. node_copy_contents(dn, sn, shared_alloc);
  3568. xml_node_struct* dit = dn;
  3569. xml_node_struct* sit = sn->first_child;
  3570. while (sit && sit != sn)
  3571. {
  3572. // loop invariant: dit is inside the subtree rooted at dn
  3573. assert(dit);
  3574. // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
  3575. if (sit != dn)
  3576. {
  3577. xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
  3578. if (copy)
  3579. {
  3580. node_copy_contents(copy, sit, shared_alloc);
  3581. if (sit->first_child)
  3582. {
  3583. dit = copy;
  3584. sit = sit->first_child;
  3585. continue;
  3586. }
  3587. }
  3588. }
  3589. // continue to the next node
  3590. do
  3591. {
  3592. if (sit->next_sibling)
  3593. {
  3594. sit = sit->next_sibling;
  3595. break;
  3596. }
  3597. sit = sit->parent;
  3598. dit = dit->parent;
  3599. // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
  3600. assert(sit == sn || dit);
  3601. }
  3602. while (sit != sn);
  3603. }
  3604. assert(!sit || dit == dn->parent);
  3605. }
  3606. PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
  3607. {
  3608. xml_allocator& alloc = get_allocator(da);
  3609. xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
  3610. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3611. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3612. }
  3613. inline bool is_text_node(xml_node_struct* node)
  3614. {
  3615. xml_node_type type = PUGI__NODETYPE(node);
  3616. return type == node_pcdata || type == node_cdata;
  3617. }
  3618. // get value with conversion functions
  3619. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
  3620. {
  3621. U result = 0;
  3622. const char_t* s = value;
  3623. while (PUGI__IS_CHARTYPE(*s, ct_space))
  3624. s++;
  3625. bool negative = (*s == '-');
  3626. s += (*s == '+' || *s == '-');
  3627. bool overflow = false;
  3628. if (s[0] == '0' && (s[1] | ' ') == 'x')
  3629. {
  3630. s += 2;
  3631. // since overflow detection relies on length of the sequence skip leading zeros
  3632. while (*s == '0')
  3633. s++;
  3634. const char_t* start = s;
  3635. for (;;)
  3636. {
  3637. if (static_cast<unsigned>(*s - '0') < 10)
  3638. result = result * 16 + (*s - '0');
  3639. else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
  3640. result = result * 16 + ((*s | ' ') - 'a' + 10);
  3641. else
  3642. break;
  3643. s++;
  3644. }
  3645. size_t digits = static_cast<size_t>(s - start);
  3646. overflow = digits > sizeof(U) * 2;
  3647. }
  3648. else
  3649. {
  3650. // since overflow detection relies on length of the sequence skip leading zeros
  3651. while (*s == '0')
  3652. s++;
  3653. const char_t* start = s;
  3654. for (;;)
  3655. {
  3656. if (static_cast<unsigned>(*s - '0') < 10)
  3657. result = result * 10 + (*s - '0');
  3658. else
  3659. break;
  3660. s++;
  3661. }
  3662. size_t digits = static_cast<size_t>(s - start);
  3663. PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
  3664. const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
  3665. const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
  3666. const size_t high_bit = sizeof(U) * 8 - 1;
  3667. overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
  3668. }
  3669. if (negative)
  3670. {
  3671. // Workaround for crayc++ CC-3059: Expected no overflow in routine.
  3672. #ifdef _CRAYC
  3673. return (overflow || result > ~minv + 1) ? minv : ~result + 1;
  3674. #else
  3675. return (overflow || result > 0 - minv) ? minv : 0 - result;
  3676. #endif
  3677. }
  3678. else
  3679. return (overflow || result > maxv) ? maxv : result;
  3680. }
  3681. PUGI__FN int get_value_int(const char_t* value)
  3682. {
  3683. return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
  3684. }
  3685. PUGI__FN unsigned int get_value_uint(const char_t* value)
  3686. {
  3687. return string_to_integer<unsigned int>(value, 0, UINT_MAX);
  3688. }
  3689. PUGI__FN double get_value_double(const char_t* value)
  3690. {
  3691. #ifdef PUGIXML_WCHAR_MODE
  3692. return wcstod(value, 0);
  3693. #else
  3694. return strtod(value, 0);
  3695. #endif
  3696. }
  3697. PUGI__FN float get_value_float(const char_t* value)
  3698. {
  3699. #ifdef PUGIXML_WCHAR_MODE
  3700. return static_cast<float>(wcstod(value, 0));
  3701. #else
  3702. return static_cast<float>(strtod(value, 0));
  3703. #endif
  3704. }
  3705. PUGI__FN bool get_value_bool(const char_t* value)
  3706. {
  3707. // only look at first char
  3708. char_t first = *value;
  3709. // 1*, t* (true), T* (True), y* (yes), Y* (YES)
  3710. return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
  3711. }
  3712. #ifdef PUGIXML_HAS_LONG_LONG
  3713. PUGI__FN long long get_value_llong(const char_t* value)
  3714. {
  3715. return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
  3716. }
  3717. PUGI__FN unsigned long long get_value_ullong(const char_t* value)
  3718. {
  3719. return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
  3720. }
  3721. #endif
  3722. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
  3723. {
  3724. char_t* result = end - 1;
  3725. U rest = negative ? 0 - value : value;
  3726. do
  3727. {
  3728. *result-- = static_cast<char_t>('0' + (rest % 10));
  3729. rest /= 10;
  3730. }
  3731. while (rest);
  3732. assert(result >= begin);
  3733. (void)begin;
  3734. *result = '-';
  3735. return result + !negative;
  3736. }
  3737. // set value with conversion functions
  3738. template <typename String, typename Header>
  3739. PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
  3740. {
  3741. #ifdef PUGIXML_WCHAR_MODE
  3742. char_t wbuf[128];
  3743. assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
  3744. size_t offset = 0;
  3745. for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
  3746. return strcpy_insitu(dest, header, header_mask, wbuf, offset);
  3747. #else
  3748. return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
  3749. #endif
  3750. }
  3751. template <typename U, typename String, typename Header>
  3752. PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
  3753. {
  3754. char_t buf[64];
  3755. char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
  3756. char_t* begin = integer_to_string(buf, end, value, negative);
  3757. return strcpy_insitu(dest, header, header_mask, begin, end - begin);
  3758. }
  3759. template <typename String, typename Header>
  3760. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
  3761. {
  3762. char buf[128];
  3763. PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
  3764. return set_value_ascii(dest, header, header_mask, buf);
  3765. }
  3766. template <typename String, typename Header>
  3767. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
  3768. {
  3769. char buf[128];
  3770. PUGI__SNPRINTF(buf, "%.*g", precision, value);
  3771. return set_value_ascii(dest, header, header_mask, buf);
  3772. }
  3773. template <typename String, typename Header>
  3774. PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
  3775. {
  3776. return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
  3777. }
  3778. PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
  3779. {
  3780. // check input buffer
  3781. if (!contents && size) return make_parse_result(status_io_error);
  3782. // get actual encoding
  3783. xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
  3784. // get private buffer
  3785. char_t* buffer = 0;
  3786. size_t length = 0;
  3787. // coverity[var_deref_model]
  3788. if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
  3789. // delete original buffer if we performed a conversion
  3790. if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
  3791. // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
  3792. if (own || buffer != contents) *out_buffer = buffer;
  3793. // store buffer for offset_debug
  3794. doc->buffer = buffer;
  3795. // parse
  3796. xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
  3797. // remember encoding
  3798. res.encoding = buffer_encoding;
  3799. return res;
  3800. }
  3801. // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
  3802. PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
  3803. {
  3804. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
  3805. // there are 64-bit versions of fseek/ftell, let's use them
  3806. typedef __int64 length_type;
  3807. _fseeki64(file, 0, SEEK_END);
  3808. length_type length = _ftelli64(file);
  3809. _fseeki64(file, 0, SEEK_SET);
  3810. #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
  3811. // there are 64-bit versions of fseek/ftell, let's use them
  3812. typedef off64_t length_type;
  3813. fseeko64(file, 0, SEEK_END);
  3814. length_type length = ftello64(file);
  3815. fseeko64(file, 0, SEEK_SET);
  3816. #else
  3817. // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
  3818. typedef long length_type;
  3819. fseek(file, 0, SEEK_END);
  3820. length_type length = ftell(file);
  3821. fseek(file, 0, SEEK_SET);
  3822. #endif
  3823. // check for I/O errors
  3824. if (length < 0) return status_io_error;
  3825. // check for overflow
  3826. size_t result = static_cast<size_t>(length);
  3827. if (static_cast<length_type>(result) != length) return status_out_of_memory;
  3828. // finalize
  3829. out_result = result;
  3830. return status_ok;
  3831. }
  3832. // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
  3833. PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
  3834. {
  3835. // We only need to zero-terminate if encoding conversion does not do it for us
  3836. #ifdef PUGIXML_WCHAR_MODE
  3837. xml_encoding wchar_encoding = get_wchar_encoding();
  3838. if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
  3839. {
  3840. size_t length = size / sizeof(char_t);
  3841. static_cast<char_t*>(buffer)[length] = 0;
  3842. return (length + 1) * sizeof(char_t);
  3843. }
  3844. #else
  3845. if (encoding == encoding_utf8)
  3846. {
  3847. static_cast<char*>(buffer)[size] = 0;
  3848. return size + 1;
  3849. }
  3850. #endif
  3851. return size;
  3852. }
  3853. PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3854. {
  3855. if (!file) return make_parse_result(status_file_not_found);
  3856. // get file size (can result in I/O errors)
  3857. size_t size = 0;
  3858. xml_parse_status size_status = get_file_size(file, size);
  3859. if (size_status != status_ok) return make_parse_result(size_status);
  3860. size_t max_suffix_size = sizeof(char_t);
  3861. // allocate buffer for the whole file
  3862. char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
  3863. if (!contents) return make_parse_result(status_out_of_memory);
  3864. // read file in memory
  3865. size_t read_size = fread(contents, 1, size, file);
  3866. if (read_size != size)
  3867. {
  3868. xml_memory::deallocate(contents);
  3869. return make_parse_result(status_io_error);
  3870. }
  3871. xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
  3872. return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3873. }
  3874. PUGI__FN void close_file(FILE* file)
  3875. {
  3876. fclose(file);
  3877. }
  3878. #ifndef PUGIXML_NO_STL
  3879. template <typename T> struct xml_stream_chunk
  3880. {
  3881. static xml_stream_chunk* create()
  3882. {
  3883. void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
  3884. if (!memory) return 0;
  3885. return new (memory) xml_stream_chunk();
  3886. }
  3887. static void destroy(xml_stream_chunk* chunk)
  3888. {
  3889. // free chunk chain
  3890. while (chunk)
  3891. {
  3892. xml_stream_chunk* next_ = chunk->next;
  3893. xml_memory::deallocate(chunk);
  3894. chunk = next_;
  3895. }
  3896. }
  3897. xml_stream_chunk(): next(0), size(0)
  3898. {
  3899. }
  3900. xml_stream_chunk* next;
  3901. size_t size;
  3902. T data[xml_memory_page_size / sizeof(T)];
  3903. };
  3904. template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3905. {
  3906. auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
  3907. // read file to a chunk list
  3908. size_t total = 0;
  3909. xml_stream_chunk<T>* last = 0;
  3910. while (!stream.eof())
  3911. {
  3912. // allocate new chunk
  3913. xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
  3914. if (!chunk) return status_out_of_memory;
  3915. // append chunk to list
  3916. if (last) last = last->next = chunk;
  3917. else chunks.data = last = chunk;
  3918. // read data to chunk
  3919. stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
  3920. chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
  3921. // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
  3922. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3923. // guard against huge files (chunk size is small enough to make this overflow check work)
  3924. if (total + chunk->size < total) return status_out_of_memory;
  3925. total += chunk->size;
  3926. }
  3927. size_t max_suffix_size = sizeof(char_t);
  3928. // copy chunk list to a contiguous buffer
  3929. char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
  3930. if (!buffer) return status_out_of_memory;
  3931. char* write = buffer;
  3932. for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
  3933. {
  3934. assert(write + chunk->size <= buffer + total);
  3935. memcpy(write, chunk->data, chunk->size);
  3936. write += chunk->size;
  3937. }
  3938. assert(write == buffer + total);
  3939. // return buffer
  3940. *out_buffer = buffer;
  3941. *out_size = total;
  3942. return status_ok;
  3943. }
  3944. template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3945. {
  3946. // get length of remaining data in stream
  3947. typename std::basic_istream<T>::pos_type pos = stream.tellg();
  3948. stream.seekg(0, std::ios::end);
  3949. std::streamoff length = stream.tellg() - pos;
  3950. stream.seekg(pos);
  3951. if (stream.fail() || pos < 0) return status_io_error;
  3952. // guard against huge files
  3953. size_t read_length = static_cast<size_t>(length);
  3954. if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
  3955. size_t max_suffix_size = sizeof(char_t);
  3956. // read stream data into memory (guard against stream exceptions with buffer holder)
  3957. auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
  3958. if (!buffer.data) return status_out_of_memory;
  3959. stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
  3960. // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
  3961. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3962. // return buffer
  3963. size_t actual_length = static_cast<size_t>(stream.gcount());
  3964. assert(actual_length <= read_length);
  3965. *out_buffer = buffer.release();
  3966. *out_size = actual_length * sizeof(T);
  3967. return status_ok;
  3968. }
  3969. template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3970. {
  3971. void* buffer = 0;
  3972. size_t size = 0;
  3973. xml_parse_status status = status_ok;
  3974. // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
  3975. if (stream.fail()) return make_parse_result(status_io_error);
  3976. // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
  3977. if (stream.tellg() < 0)
  3978. {
  3979. stream.clear(); // clear error flags that could be set by a failing tellg
  3980. status = load_stream_data_noseek(stream, &buffer, &size);
  3981. }
  3982. else
  3983. status = load_stream_data_seek(stream, &buffer, &size);
  3984. if (status != status_ok) return make_parse_result(status);
  3985. xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
  3986. return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3987. }
  3988. #endif
  3989. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
  3990. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  3991. {
  3992. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  3993. FILE* file = 0;
  3994. return _wfopen_s(&file, path, mode) == 0 ? file : 0;
  3995. #else
  3996. return _wfopen(path, mode);
  3997. #endif
  3998. }
  3999. #else
  4000. PUGI__FN char* convert_path_heap(const wchar_t* str)
  4001. {
  4002. assert(str);
  4003. // first pass: get length in utf8 characters
  4004. size_t length = strlength_wide(str);
  4005. size_t size = as_utf8_begin(str, length);
  4006. // allocate resulting string
  4007. char* result = static_cast<char*>(xml_memory::allocate(size + 1));
  4008. if (!result) return 0;
  4009. // second pass: convert to utf8
  4010. as_utf8_end(result, size, str, length);
  4011. // zero-terminate
  4012. result[size] = 0;
  4013. return result;
  4014. }
  4015. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4016. {
  4017. // there is no standard function to open wide paths, so our best bet is to try utf8 path
  4018. char* path_utf8 = convert_path_heap(path);
  4019. if (!path_utf8) return 0;
  4020. // convert mode to ASCII (we mirror _wfopen interface)
  4021. char mode_ascii[4] = {0};
  4022. for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
  4023. // try to open the utf8 path
  4024. FILE* result = fopen(path_utf8, mode_ascii);
  4025. // free dummy buffer
  4026. xml_memory::deallocate(path_utf8);
  4027. return result;
  4028. }
  4029. #endif
  4030. PUGI__FN FILE* open_file(const char* path, const char* mode)
  4031. {
  4032. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  4033. FILE* file = 0;
  4034. return fopen_s(&file, path, mode) == 0 ? file : 0;
  4035. #else
  4036. return fopen(path, mode);
  4037. #endif
  4038. }
  4039. PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
  4040. {
  4041. if (!file) return false;
  4042. xml_writer_file writer(file);
  4043. doc.save(writer, indent, flags, encoding);
  4044. return ferror(file) == 0;
  4045. }
  4046. struct name_null_sentry
  4047. {
  4048. xml_node_struct* node;
  4049. char_t* name;
  4050. name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
  4051. {
  4052. node->name = 0;
  4053. }
  4054. ~name_null_sentry()
  4055. {
  4056. node->name = name;
  4057. }
  4058. };
  4059. PUGI__NS_END
  4060. namespace pugi
  4061. {
  4062. PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
  4063. {
  4064. }
  4065. PUGI__FN void xml_writer_file::write(const void* data, size_t size)
  4066. {
  4067. size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
  4068. (void)!result; // unfortunately we can't do proper error handling here
  4069. }
  4070. #ifndef PUGIXML_NO_STL
  4071. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
  4072. {
  4073. }
  4074. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
  4075. {
  4076. }
  4077. PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
  4078. {
  4079. if (narrow_stream)
  4080. {
  4081. assert(!wide_stream);
  4082. narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
  4083. }
  4084. else
  4085. {
  4086. assert(wide_stream);
  4087. assert(size % sizeof(wchar_t) == 0);
  4088. wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
  4089. }
  4090. }
  4091. #endif
  4092. PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
  4093. {
  4094. }
  4095. PUGI__FN xml_tree_walker::~xml_tree_walker()
  4096. {
  4097. }
  4098. PUGI__FN int xml_tree_walker::depth() const
  4099. {
  4100. return _depth;
  4101. }
  4102. PUGI__FN bool xml_tree_walker::begin(xml_node&)
  4103. {
  4104. return true;
  4105. }
  4106. PUGI__FN bool xml_tree_walker::end(xml_node&)
  4107. {
  4108. return true;
  4109. }
  4110. PUGI__FN xml_attribute::xml_attribute(): _attr(0)
  4111. {
  4112. }
  4113. PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
  4114. {
  4115. }
  4116. PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
  4117. {
  4118. }
  4119. PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
  4120. {
  4121. return _attr ? unspecified_bool_xml_attribute : 0;
  4122. }
  4123. PUGI__FN bool xml_attribute::operator!() const
  4124. {
  4125. return !_attr;
  4126. }
  4127. PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
  4128. {
  4129. return (_attr == r._attr);
  4130. }
  4131. PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
  4132. {
  4133. return (_attr != r._attr);
  4134. }
  4135. PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
  4136. {
  4137. return (_attr < r._attr);
  4138. }
  4139. PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
  4140. {
  4141. return (_attr > r._attr);
  4142. }
  4143. PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
  4144. {
  4145. return (_attr <= r._attr);
  4146. }
  4147. PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
  4148. {
  4149. return (_attr >= r._attr);
  4150. }
  4151. PUGI__FN xml_attribute xml_attribute::next_attribute() const
  4152. {
  4153. return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
  4154. }
  4155. PUGI__FN xml_attribute xml_attribute::previous_attribute() const
  4156. {
  4157. return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
  4158. }
  4159. PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
  4160. {
  4161. return (_attr && _attr->value) ? _attr->value + 0 : def;
  4162. }
  4163. PUGI__FN int xml_attribute::as_int(int def) const
  4164. {
  4165. return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
  4166. }
  4167. PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
  4168. {
  4169. return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
  4170. }
  4171. PUGI__FN double xml_attribute::as_double(double def) const
  4172. {
  4173. return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
  4174. }
  4175. PUGI__FN float xml_attribute::as_float(float def) const
  4176. {
  4177. return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
  4178. }
  4179. PUGI__FN bool xml_attribute::as_bool(bool def) const
  4180. {
  4181. return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
  4182. }
  4183. #ifdef PUGIXML_HAS_LONG_LONG
  4184. PUGI__FN long long xml_attribute::as_llong(long long def) const
  4185. {
  4186. return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
  4187. }
  4188. PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
  4189. {
  4190. return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
  4191. }
  4192. #endif
  4193. PUGI__FN bool xml_attribute::empty() const
  4194. {
  4195. return !_attr;
  4196. }
  4197. PUGI__FN const char_t* xml_attribute::name() const
  4198. {
  4199. return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
  4200. }
  4201. PUGI__FN const char_t* xml_attribute::value() const
  4202. {
  4203. return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
  4204. }
  4205. PUGI__FN size_t xml_attribute::hash_value() const
  4206. {
  4207. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
  4208. }
  4209. PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
  4210. {
  4211. return _attr;
  4212. }
  4213. PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
  4214. {
  4215. set_value(rhs);
  4216. return *this;
  4217. }
  4218. PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
  4219. {
  4220. set_value(rhs);
  4221. return *this;
  4222. }
  4223. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
  4224. {
  4225. set_value(rhs);
  4226. return *this;
  4227. }
  4228. PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
  4229. {
  4230. set_value(rhs);
  4231. return *this;
  4232. }
  4233. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
  4234. {
  4235. set_value(rhs);
  4236. return *this;
  4237. }
  4238. PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
  4239. {
  4240. set_value(rhs);
  4241. return *this;
  4242. }
  4243. PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
  4244. {
  4245. set_value(rhs);
  4246. return *this;
  4247. }
  4248. PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
  4249. {
  4250. set_value(rhs);
  4251. return *this;
  4252. }
  4253. #ifdef PUGIXML_HAS_LONG_LONG
  4254. PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
  4255. {
  4256. set_value(rhs);
  4257. return *this;
  4258. }
  4259. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
  4260. {
  4261. set_value(rhs);
  4262. return *this;
  4263. }
  4264. #endif
  4265. PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
  4266. {
  4267. if (!_attr) return false;
  4268. return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4269. }
  4270. PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
  4271. {
  4272. if (!_attr) return false;
  4273. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4274. }
  4275. PUGI__FN bool xml_attribute::set_value(int rhs)
  4276. {
  4277. if (!_attr) return false;
  4278. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4279. }
  4280. PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
  4281. {
  4282. if (!_attr) return false;
  4283. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4284. }
  4285. PUGI__FN bool xml_attribute::set_value(long rhs)
  4286. {
  4287. if (!_attr) return false;
  4288. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4289. }
  4290. PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
  4291. {
  4292. if (!_attr) return false;
  4293. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4294. }
  4295. PUGI__FN bool xml_attribute::set_value(double rhs)
  4296. {
  4297. if (!_attr) return false;
  4298. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
  4299. }
  4300. PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
  4301. {
  4302. if (!_attr) return false;
  4303. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4304. }
  4305. PUGI__FN bool xml_attribute::set_value(float rhs)
  4306. {
  4307. if (!_attr) return false;
  4308. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
  4309. }
  4310. PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
  4311. {
  4312. if (!_attr) return false;
  4313. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4314. }
  4315. PUGI__FN bool xml_attribute::set_value(bool rhs)
  4316. {
  4317. if (!_attr) return false;
  4318. return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
  4319. }
  4320. #ifdef PUGIXML_HAS_LONG_LONG
  4321. PUGI__FN bool xml_attribute::set_value(long long rhs)
  4322. {
  4323. if (!_attr) return false;
  4324. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4325. }
  4326. PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
  4327. {
  4328. if (!_attr) return false;
  4329. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4330. }
  4331. #endif
  4332. #ifdef __BORLANDC__
  4333. PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
  4334. {
  4335. return (bool)lhs && rhs;
  4336. }
  4337. PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
  4338. {
  4339. return (bool)lhs || rhs;
  4340. }
  4341. #endif
  4342. PUGI__FN xml_node::xml_node(): _root(0)
  4343. {
  4344. }
  4345. PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
  4346. {
  4347. }
  4348. PUGI__FN static void unspecified_bool_xml_node(xml_node***)
  4349. {
  4350. }
  4351. PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
  4352. {
  4353. return _root ? unspecified_bool_xml_node : 0;
  4354. }
  4355. PUGI__FN bool xml_node::operator!() const
  4356. {
  4357. return !_root;
  4358. }
  4359. PUGI__FN xml_node::iterator xml_node::begin() const
  4360. {
  4361. return iterator(_root ? _root->first_child + 0 : 0, _root);
  4362. }
  4363. PUGI__FN xml_node::iterator xml_node::end() const
  4364. {
  4365. return iterator(0, _root);
  4366. }
  4367. PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
  4368. {
  4369. return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
  4370. }
  4371. PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
  4372. {
  4373. return attribute_iterator(0, _root);
  4374. }
  4375. PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
  4376. {
  4377. return xml_object_range<xml_node_iterator>(begin(), end());
  4378. }
  4379. PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
  4380. {
  4381. return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
  4382. }
  4383. PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
  4384. {
  4385. return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
  4386. }
  4387. PUGI__FN bool xml_node::operator==(const xml_node& r) const
  4388. {
  4389. return (_root == r._root);
  4390. }
  4391. PUGI__FN bool xml_node::operator!=(const xml_node& r) const
  4392. {
  4393. return (_root != r._root);
  4394. }
  4395. PUGI__FN bool xml_node::operator<(const xml_node& r) const
  4396. {
  4397. return (_root < r._root);
  4398. }
  4399. PUGI__FN bool xml_node::operator>(const xml_node& r) const
  4400. {
  4401. return (_root > r._root);
  4402. }
  4403. PUGI__FN bool xml_node::operator<=(const xml_node& r) const
  4404. {
  4405. return (_root <= r._root);
  4406. }
  4407. PUGI__FN bool xml_node::operator>=(const xml_node& r) const
  4408. {
  4409. return (_root >= r._root);
  4410. }
  4411. PUGI__FN bool xml_node::empty() const
  4412. {
  4413. return !_root;
  4414. }
  4415. PUGI__FN const char_t* xml_node::name() const
  4416. {
  4417. return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
  4418. }
  4419. PUGI__FN xml_node_type xml_node::type() const
  4420. {
  4421. return _root ? PUGI__NODETYPE(_root) : node_null;
  4422. }
  4423. PUGI__FN const char_t* xml_node::value() const
  4424. {
  4425. return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
  4426. }
  4427. PUGI__FN xml_node xml_node::child(const char_t* name_) const
  4428. {
  4429. if (!_root) return xml_node();
  4430. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4431. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4432. return xml_node();
  4433. }
  4434. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
  4435. {
  4436. if (!_root) return xml_attribute();
  4437. for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
  4438. if (i->name && impl::strequal(name_, i->name))
  4439. return xml_attribute(i);
  4440. return xml_attribute();
  4441. }
  4442. PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
  4443. {
  4444. if (!_root) return xml_node();
  4445. for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
  4446. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4447. return xml_node();
  4448. }
  4449. PUGI__FN xml_node xml_node::next_sibling() const
  4450. {
  4451. return _root ? xml_node(_root->next_sibling) : xml_node();
  4452. }
  4453. PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
  4454. {
  4455. if (!_root) return xml_node();
  4456. for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
  4457. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4458. return xml_node();
  4459. }
  4460. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
  4461. {
  4462. xml_attribute_struct* hint = hint_._attr;
  4463. // if hint is not an attribute of node, behavior is not defined
  4464. assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
  4465. if (!_root) return xml_attribute();
  4466. // optimistically search from hint up until the end
  4467. for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
  4468. if (i->name && impl::strequal(name_, i->name))
  4469. {
  4470. // update hint to maximize efficiency of searching for consecutive attributes
  4471. hint_._attr = i->next_attribute;
  4472. return xml_attribute(i);
  4473. }
  4474. // wrap around and search from the first attribute until the hint
  4475. // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
  4476. for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
  4477. if (j->name && impl::strequal(name_, j->name))
  4478. {
  4479. // update hint to maximize efficiency of searching for consecutive attributes
  4480. hint_._attr = j->next_attribute;
  4481. return xml_attribute(j);
  4482. }
  4483. return xml_attribute();
  4484. }
  4485. PUGI__FN xml_node xml_node::previous_sibling() const
  4486. {
  4487. if (!_root) return xml_node();
  4488. if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
  4489. else return xml_node();
  4490. }
  4491. PUGI__FN xml_node xml_node::parent() const
  4492. {
  4493. return _root ? xml_node(_root->parent) : xml_node();
  4494. }
  4495. PUGI__FN xml_node xml_node::root() const
  4496. {
  4497. return _root ? xml_node(&impl::get_document(_root)) : xml_node();
  4498. }
  4499. PUGI__FN xml_text xml_node::text() const
  4500. {
  4501. return xml_text(_root);
  4502. }
  4503. PUGI__FN const char_t* xml_node::child_value() const
  4504. {
  4505. if (!_root) return PUGIXML_TEXT("");
  4506. // element nodes can have value if parse_embed_pcdata was used
  4507. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  4508. return _root->value;
  4509. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4510. if (impl::is_text_node(i) && i->value)
  4511. return i->value;
  4512. return PUGIXML_TEXT("");
  4513. }
  4514. PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
  4515. {
  4516. return child(name_).child_value();
  4517. }
  4518. PUGI__FN xml_attribute xml_node::first_attribute() const
  4519. {
  4520. return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
  4521. }
  4522. PUGI__FN xml_attribute xml_node::last_attribute() const
  4523. {
  4524. return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
  4525. }
  4526. PUGI__FN xml_node xml_node::first_child() const
  4527. {
  4528. return _root ? xml_node(_root->first_child) : xml_node();
  4529. }
  4530. PUGI__FN xml_node xml_node::last_child() const
  4531. {
  4532. return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
  4533. }
  4534. PUGI__FN bool xml_node::set_name(const char_t* rhs)
  4535. {
  4536. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4537. if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
  4538. return false;
  4539. return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4540. }
  4541. PUGI__FN bool xml_node::set_value(const char_t* rhs)
  4542. {
  4543. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4544. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4545. return false;
  4546. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4547. }
  4548. PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
  4549. {
  4550. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4551. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4552. if (!alloc.reserve()) return xml_attribute();
  4553. xml_attribute a(impl::allocate_attribute(alloc));
  4554. if (!a) return xml_attribute();
  4555. impl::append_attribute(a._attr, _root);
  4556. a.set_name(name_);
  4557. return a;
  4558. }
  4559. PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
  4560. {
  4561. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4562. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4563. if (!alloc.reserve()) return xml_attribute();
  4564. xml_attribute a(impl::allocate_attribute(alloc));
  4565. if (!a) return xml_attribute();
  4566. impl::prepend_attribute(a._attr, _root);
  4567. a.set_name(name_);
  4568. return a;
  4569. }
  4570. PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
  4571. {
  4572. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4573. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4574. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4575. if (!alloc.reserve()) return xml_attribute();
  4576. xml_attribute a(impl::allocate_attribute(alloc));
  4577. if (!a) return xml_attribute();
  4578. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4579. a.set_name(name_);
  4580. return a;
  4581. }
  4582. PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
  4583. {
  4584. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4585. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4586. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4587. if (!alloc.reserve()) return xml_attribute();
  4588. xml_attribute a(impl::allocate_attribute(alloc));
  4589. if (!a) return xml_attribute();
  4590. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4591. a.set_name(name_);
  4592. return a;
  4593. }
  4594. PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
  4595. {
  4596. if (!proto) return xml_attribute();
  4597. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4598. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4599. if (!alloc.reserve()) return xml_attribute();
  4600. xml_attribute a(impl::allocate_attribute(alloc));
  4601. if (!a) return xml_attribute();
  4602. impl::append_attribute(a._attr, _root);
  4603. impl::node_copy_attribute(a._attr, proto._attr);
  4604. return a;
  4605. }
  4606. PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
  4607. {
  4608. if (!proto) return xml_attribute();
  4609. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4610. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4611. if (!alloc.reserve()) return xml_attribute();
  4612. xml_attribute a(impl::allocate_attribute(alloc));
  4613. if (!a) return xml_attribute();
  4614. impl::prepend_attribute(a._attr, _root);
  4615. impl::node_copy_attribute(a._attr, proto._attr);
  4616. return a;
  4617. }
  4618. PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
  4619. {
  4620. if (!proto) return xml_attribute();
  4621. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4622. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4623. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4624. if (!alloc.reserve()) return xml_attribute();
  4625. xml_attribute a(impl::allocate_attribute(alloc));
  4626. if (!a) return xml_attribute();
  4627. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4628. impl::node_copy_attribute(a._attr, proto._attr);
  4629. return a;
  4630. }
  4631. PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
  4632. {
  4633. if (!proto) return xml_attribute();
  4634. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4635. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4636. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4637. if (!alloc.reserve()) return xml_attribute();
  4638. xml_attribute a(impl::allocate_attribute(alloc));
  4639. if (!a) return xml_attribute();
  4640. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4641. impl::node_copy_attribute(a._attr, proto._attr);
  4642. return a;
  4643. }
  4644. PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
  4645. {
  4646. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4647. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4648. if (!alloc.reserve()) return xml_node();
  4649. xml_node n(impl::allocate_node(alloc, type_));
  4650. if (!n) return xml_node();
  4651. impl::append_node(n._root, _root);
  4652. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4653. return n;
  4654. }
  4655. PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
  4656. {
  4657. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4658. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4659. if (!alloc.reserve()) return xml_node();
  4660. xml_node n(impl::allocate_node(alloc, type_));
  4661. if (!n) return xml_node();
  4662. impl::prepend_node(n._root, _root);
  4663. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4664. return n;
  4665. }
  4666. PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
  4667. {
  4668. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4669. if (!node._root || node._root->parent != _root) return xml_node();
  4670. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4671. if (!alloc.reserve()) return xml_node();
  4672. xml_node n(impl::allocate_node(alloc, type_));
  4673. if (!n) return xml_node();
  4674. impl::insert_node_before(n._root, node._root);
  4675. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4676. return n;
  4677. }
  4678. PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
  4679. {
  4680. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4681. if (!node._root || node._root->parent != _root) return xml_node();
  4682. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4683. if (!alloc.reserve()) return xml_node();
  4684. xml_node n(impl::allocate_node(alloc, type_));
  4685. if (!n) return xml_node();
  4686. impl::insert_node_after(n._root, node._root);
  4687. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4688. return n;
  4689. }
  4690. PUGI__FN xml_node xml_node::append_child(const char_t* name_)
  4691. {
  4692. xml_node result = append_child(node_element);
  4693. result.set_name(name_);
  4694. return result;
  4695. }
  4696. PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
  4697. {
  4698. xml_node result = prepend_child(node_element);
  4699. result.set_name(name_);
  4700. return result;
  4701. }
  4702. PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
  4703. {
  4704. xml_node result = insert_child_after(node_element, node);
  4705. result.set_name(name_);
  4706. return result;
  4707. }
  4708. PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
  4709. {
  4710. xml_node result = insert_child_before(node_element, node);
  4711. result.set_name(name_);
  4712. return result;
  4713. }
  4714. PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
  4715. {
  4716. xml_node_type type_ = proto.type();
  4717. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4718. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4719. if (!alloc.reserve()) return xml_node();
  4720. xml_node n(impl::allocate_node(alloc, type_));
  4721. if (!n) return xml_node();
  4722. impl::append_node(n._root, _root);
  4723. impl::node_copy_tree(n._root, proto._root);
  4724. return n;
  4725. }
  4726. PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
  4727. {
  4728. xml_node_type type_ = proto.type();
  4729. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4730. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4731. if (!alloc.reserve()) return xml_node();
  4732. xml_node n(impl::allocate_node(alloc, type_));
  4733. if (!n) return xml_node();
  4734. impl::prepend_node(n._root, _root);
  4735. impl::node_copy_tree(n._root, proto._root);
  4736. return n;
  4737. }
  4738. PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
  4739. {
  4740. xml_node_type type_ = proto.type();
  4741. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4742. if (!node._root || node._root->parent != _root) return xml_node();
  4743. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4744. if (!alloc.reserve()) return xml_node();
  4745. xml_node n(impl::allocate_node(alloc, type_));
  4746. if (!n) return xml_node();
  4747. impl::insert_node_after(n._root, node._root);
  4748. impl::node_copy_tree(n._root, proto._root);
  4749. return n;
  4750. }
  4751. PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
  4752. {
  4753. xml_node_type type_ = proto.type();
  4754. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4755. if (!node._root || node._root->parent != _root) return xml_node();
  4756. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4757. if (!alloc.reserve()) return xml_node();
  4758. xml_node n(impl::allocate_node(alloc, type_));
  4759. if (!n) return xml_node();
  4760. impl::insert_node_before(n._root, node._root);
  4761. impl::node_copy_tree(n._root, proto._root);
  4762. return n;
  4763. }
  4764. PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
  4765. {
  4766. if (!impl::allow_move(*this, moved)) return xml_node();
  4767. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4768. if (!alloc.reserve()) return xml_node();
  4769. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4770. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4771. impl::remove_node(moved._root);
  4772. impl::append_node(moved._root, _root);
  4773. return moved;
  4774. }
  4775. PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
  4776. {
  4777. if (!impl::allow_move(*this, moved)) return xml_node();
  4778. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4779. if (!alloc.reserve()) return xml_node();
  4780. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4781. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4782. impl::remove_node(moved._root);
  4783. impl::prepend_node(moved._root, _root);
  4784. return moved;
  4785. }
  4786. PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
  4787. {
  4788. if (!impl::allow_move(*this, moved)) return xml_node();
  4789. if (!node._root || node._root->parent != _root) return xml_node();
  4790. if (moved._root == node._root) return xml_node();
  4791. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4792. if (!alloc.reserve()) return xml_node();
  4793. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4794. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4795. impl::remove_node(moved._root);
  4796. impl::insert_node_after(moved._root, node._root);
  4797. return moved;
  4798. }
  4799. PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
  4800. {
  4801. if (!impl::allow_move(*this, moved)) return xml_node();
  4802. if (!node._root || node._root->parent != _root) return xml_node();
  4803. if (moved._root == node._root) return xml_node();
  4804. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4805. if (!alloc.reserve()) return xml_node();
  4806. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4807. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4808. impl::remove_node(moved._root);
  4809. impl::insert_node_before(moved._root, node._root);
  4810. return moved;
  4811. }
  4812. PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
  4813. {
  4814. return remove_attribute(attribute(name_));
  4815. }
  4816. PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
  4817. {
  4818. if (!_root || !a._attr) return false;
  4819. if (!impl::is_attribute_of(a._attr, _root)) return false;
  4820. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4821. if (!alloc.reserve()) return false;
  4822. impl::remove_attribute(a._attr, _root);
  4823. impl::destroy_attribute(a._attr, alloc);
  4824. return true;
  4825. }
  4826. PUGI__FN bool xml_node::remove_attributes()
  4827. {
  4828. if (!_root) return false;
  4829. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4830. if (!alloc.reserve()) return false;
  4831. for (xml_attribute_struct* attr = _root->first_attribute; attr; )
  4832. {
  4833. xml_attribute_struct* next = attr->next_attribute;
  4834. impl::destroy_attribute(attr, alloc);
  4835. attr = next;
  4836. }
  4837. _root->first_attribute = 0;
  4838. return true;
  4839. }
  4840. PUGI__FN bool xml_node::remove_child(const char_t* name_)
  4841. {
  4842. return remove_child(child(name_));
  4843. }
  4844. PUGI__FN bool xml_node::remove_child(const xml_node& n)
  4845. {
  4846. if (!_root || !n._root || n._root->parent != _root) return false;
  4847. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4848. if (!alloc.reserve()) return false;
  4849. impl::remove_node(n._root);
  4850. impl::destroy_node(n._root, alloc);
  4851. return true;
  4852. }
  4853. PUGI__FN bool xml_node::remove_children()
  4854. {
  4855. if (!_root) return false;
  4856. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4857. if (!alloc.reserve()) return false;
  4858. for (xml_node_struct* cur = _root->first_child; cur; )
  4859. {
  4860. xml_node_struct* next = cur->next_sibling;
  4861. impl::destroy_node(cur, alloc);
  4862. cur = next;
  4863. }
  4864. _root->first_child = 0;
  4865. return true;
  4866. }
  4867. PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  4868. {
  4869. // append_buffer is only valid for elements/documents
  4870. if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
  4871. // get document node
  4872. impl::xml_document_struct* doc = &impl::get_document(_root);
  4873. // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
  4874. doc->header |= impl::xml_memory_page_contents_shared_mask;
  4875. // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
  4876. impl::xml_memory_page* page = 0;
  4877. impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
  4878. (void)page;
  4879. if (!extra) return impl::make_parse_result(status_out_of_memory);
  4880. #ifdef PUGIXML_COMPACT
  4881. // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
  4882. // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
  4883. extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
  4884. #endif
  4885. // add extra buffer to the list
  4886. extra->buffer = 0;
  4887. extra->next = doc->extra_buffers;
  4888. doc->extra_buffers = extra;
  4889. // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
  4890. impl::name_null_sentry sentry(_root);
  4891. return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
  4892. }
  4893. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
  4894. {
  4895. if (!_root) return xml_node();
  4896. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4897. if (i->name && impl::strequal(name_, i->name))
  4898. {
  4899. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  4900. if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
  4901. return xml_node(i);
  4902. }
  4903. return xml_node();
  4904. }
  4905. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
  4906. {
  4907. if (!_root) return xml_node();
  4908. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4909. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  4910. if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
  4911. return xml_node(i);
  4912. return xml_node();
  4913. }
  4914. #ifndef PUGIXML_NO_STL
  4915. PUGI__FN string_t xml_node::path(char_t delimiter) const
  4916. {
  4917. if (!_root) return string_t();
  4918. size_t offset = 0;
  4919. for (xml_node_struct* i = _root; i; i = i->parent)
  4920. {
  4921. offset += (i != _root);
  4922. offset += i->name ? impl::strlength(i->name) : 0;
  4923. }
  4924. string_t result;
  4925. result.resize(offset);
  4926. for (xml_node_struct* j = _root; j; j = j->parent)
  4927. {
  4928. if (j != _root)
  4929. result[--offset] = delimiter;
  4930. if (j->name)
  4931. {
  4932. size_t length = impl::strlength(j->name);
  4933. offset -= length;
  4934. memcpy(&result[offset], j->name, length * sizeof(char_t));
  4935. }
  4936. }
  4937. assert(offset == 0);
  4938. return result;
  4939. }
  4940. #endif
  4941. PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
  4942. {
  4943. xml_node context = path_[0] == delimiter ? root() : *this;
  4944. if (!context._root) return xml_node();
  4945. const char_t* path_segment = path_;
  4946. while (*path_segment == delimiter) ++path_segment;
  4947. const char_t* path_segment_end = path_segment;
  4948. while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
  4949. if (path_segment == path_segment_end) return context;
  4950. const char_t* next_segment = path_segment_end;
  4951. while (*next_segment == delimiter) ++next_segment;
  4952. if (*path_segment == '.' && path_segment + 1 == path_segment_end)
  4953. return context.first_element_by_path(next_segment, delimiter);
  4954. else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
  4955. return context.parent().first_element_by_path(next_segment, delimiter);
  4956. else
  4957. {
  4958. for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
  4959. {
  4960. if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
  4961. {
  4962. xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
  4963. if (subsearch) return subsearch;
  4964. }
  4965. }
  4966. return xml_node();
  4967. }
  4968. }
  4969. PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
  4970. {
  4971. walker._depth = -1;
  4972. xml_node arg_begin(_root);
  4973. if (!walker.begin(arg_begin)) return false;
  4974. xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
  4975. if (cur)
  4976. {
  4977. ++walker._depth;
  4978. do
  4979. {
  4980. xml_node arg_for_each(cur);
  4981. if (!walker.for_each(arg_for_each))
  4982. return false;
  4983. if (cur->first_child)
  4984. {
  4985. ++walker._depth;
  4986. cur = cur->first_child;
  4987. }
  4988. else if (cur->next_sibling)
  4989. cur = cur->next_sibling;
  4990. else
  4991. {
  4992. while (!cur->next_sibling && cur != _root && cur->parent)
  4993. {
  4994. --walker._depth;
  4995. cur = cur->parent;
  4996. }
  4997. if (cur != _root)
  4998. cur = cur->next_sibling;
  4999. }
  5000. }
  5001. while (cur && cur != _root);
  5002. }
  5003. assert(walker._depth == -1);
  5004. xml_node arg_end(_root);
  5005. return walker.end(arg_end);
  5006. }
  5007. PUGI__FN size_t xml_node::hash_value() const
  5008. {
  5009. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
  5010. }
  5011. PUGI__FN xml_node_struct* xml_node::internal_object() const
  5012. {
  5013. return _root;
  5014. }
  5015. PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5016. {
  5017. if (!_root) return;
  5018. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5019. impl::node_output(buffered_writer, _root, indent, flags, depth);
  5020. buffered_writer.flush();
  5021. }
  5022. #ifndef PUGIXML_NO_STL
  5023. PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5024. {
  5025. xml_writer_stream writer(stream);
  5026. print(writer, indent, flags, encoding, depth);
  5027. }
  5028. PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
  5029. {
  5030. xml_writer_stream writer(stream);
  5031. print(writer, indent, flags, encoding_wchar, depth);
  5032. }
  5033. #endif
  5034. PUGI__FN ptrdiff_t xml_node::offset_debug() const
  5035. {
  5036. if (!_root) return -1;
  5037. impl::xml_document_struct& doc = impl::get_document(_root);
  5038. // we can determine the offset reliably only if there is exactly once parse buffer
  5039. if (!doc.buffer || doc.extra_buffers) return -1;
  5040. switch (type())
  5041. {
  5042. case node_document:
  5043. return 0;
  5044. case node_element:
  5045. case node_declaration:
  5046. case node_pi:
  5047. return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
  5048. case node_pcdata:
  5049. case node_cdata:
  5050. case node_comment:
  5051. case node_doctype:
  5052. return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
  5053. default:
  5054. assert(false && "Invalid node type"); // unreachable
  5055. return -1;
  5056. }
  5057. }
  5058. #ifdef __BORLANDC__
  5059. PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
  5060. {
  5061. return (bool)lhs && rhs;
  5062. }
  5063. PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
  5064. {
  5065. return (bool)lhs || rhs;
  5066. }
  5067. #endif
  5068. PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
  5069. {
  5070. }
  5071. PUGI__FN xml_node_struct* xml_text::_data() const
  5072. {
  5073. if (!_root || impl::is_text_node(_root)) return _root;
  5074. // element nodes can have value if parse_embed_pcdata was used
  5075. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  5076. return _root;
  5077. for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
  5078. if (impl::is_text_node(node))
  5079. return node;
  5080. return 0;
  5081. }
  5082. PUGI__FN xml_node_struct* xml_text::_data_new()
  5083. {
  5084. xml_node_struct* d = _data();
  5085. if (d) return d;
  5086. return xml_node(_root).append_child(node_pcdata).internal_object();
  5087. }
  5088. PUGI__FN xml_text::xml_text(): _root(0)
  5089. {
  5090. }
  5091. PUGI__FN static void unspecified_bool_xml_text(xml_text***)
  5092. {
  5093. }
  5094. PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
  5095. {
  5096. return _data() ? unspecified_bool_xml_text : 0;
  5097. }
  5098. PUGI__FN bool xml_text::operator!() const
  5099. {
  5100. return !_data();
  5101. }
  5102. PUGI__FN bool xml_text::empty() const
  5103. {
  5104. return _data() == 0;
  5105. }
  5106. PUGI__FN const char_t* xml_text::get() const
  5107. {
  5108. xml_node_struct* d = _data();
  5109. return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
  5110. }
  5111. PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
  5112. {
  5113. xml_node_struct* d = _data();
  5114. return (d && d->value) ? d->value + 0 : def;
  5115. }
  5116. PUGI__FN int xml_text::as_int(int def) const
  5117. {
  5118. xml_node_struct* d = _data();
  5119. return (d && d->value) ? impl::get_value_int(d->value) : def;
  5120. }
  5121. PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
  5122. {
  5123. xml_node_struct* d = _data();
  5124. return (d && d->value) ? impl::get_value_uint(d->value) : def;
  5125. }
  5126. PUGI__FN double xml_text::as_double(double def) const
  5127. {
  5128. xml_node_struct* d = _data();
  5129. return (d && d->value) ? impl::get_value_double(d->value) : def;
  5130. }
  5131. PUGI__FN float xml_text::as_float(float def) const
  5132. {
  5133. xml_node_struct* d = _data();
  5134. return (d && d->value) ? impl::get_value_float(d->value) : def;
  5135. }
  5136. PUGI__FN bool xml_text::as_bool(bool def) const
  5137. {
  5138. xml_node_struct* d = _data();
  5139. return (d && d->value) ? impl::get_value_bool(d->value) : def;
  5140. }
  5141. #ifdef PUGIXML_HAS_LONG_LONG
  5142. PUGI__FN long long xml_text::as_llong(long long def) const
  5143. {
  5144. xml_node_struct* d = _data();
  5145. return (d && d->value) ? impl::get_value_llong(d->value) : def;
  5146. }
  5147. PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
  5148. {
  5149. xml_node_struct* d = _data();
  5150. return (d && d->value) ? impl::get_value_ullong(d->value) : def;
  5151. }
  5152. #endif
  5153. PUGI__FN bool xml_text::set(const char_t* rhs)
  5154. {
  5155. xml_node_struct* dn = _data_new();
  5156. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
  5157. }
  5158. PUGI__FN bool xml_text::set(int rhs)
  5159. {
  5160. xml_node_struct* dn = _data_new();
  5161. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5162. }
  5163. PUGI__FN bool xml_text::set(unsigned int rhs)
  5164. {
  5165. xml_node_struct* dn = _data_new();
  5166. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5167. }
  5168. PUGI__FN bool xml_text::set(long rhs)
  5169. {
  5170. xml_node_struct* dn = _data_new();
  5171. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5172. }
  5173. PUGI__FN bool xml_text::set(unsigned long rhs)
  5174. {
  5175. xml_node_struct* dn = _data_new();
  5176. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5177. }
  5178. PUGI__FN bool xml_text::set(float rhs)
  5179. {
  5180. xml_node_struct* dn = _data_new();
  5181. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
  5182. }
  5183. PUGI__FN bool xml_text::set(float rhs, int precision)
  5184. {
  5185. xml_node_struct* dn = _data_new();
  5186. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5187. }
  5188. PUGI__FN bool xml_text::set(double rhs)
  5189. {
  5190. xml_node_struct* dn = _data_new();
  5191. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
  5192. }
  5193. PUGI__FN bool xml_text::set(double rhs, int precision)
  5194. {
  5195. xml_node_struct* dn = _data_new();
  5196. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5197. }
  5198. PUGI__FN bool xml_text::set(bool rhs)
  5199. {
  5200. xml_node_struct* dn = _data_new();
  5201. return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
  5202. }
  5203. #ifdef PUGIXML_HAS_LONG_LONG
  5204. PUGI__FN bool xml_text::set(long long rhs)
  5205. {
  5206. xml_node_struct* dn = _data_new();
  5207. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5208. }
  5209. PUGI__FN bool xml_text::set(unsigned long long rhs)
  5210. {
  5211. xml_node_struct* dn = _data_new();
  5212. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5213. }
  5214. #endif
  5215. PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
  5216. {
  5217. set(rhs);
  5218. return *this;
  5219. }
  5220. PUGI__FN xml_text& xml_text::operator=(int rhs)
  5221. {
  5222. set(rhs);
  5223. return *this;
  5224. }
  5225. PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
  5226. {
  5227. set(rhs);
  5228. return *this;
  5229. }
  5230. PUGI__FN xml_text& xml_text::operator=(long rhs)
  5231. {
  5232. set(rhs);
  5233. return *this;
  5234. }
  5235. PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
  5236. {
  5237. set(rhs);
  5238. return *this;
  5239. }
  5240. PUGI__FN xml_text& xml_text::operator=(double rhs)
  5241. {
  5242. set(rhs);
  5243. return *this;
  5244. }
  5245. PUGI__FN xml_text& xml_text::operator=(float rhs)
  5246. {
  5247. set(rhs);
  5248. return *this;
  5249. }
  5250. PUGI__FN xml_text& xml_text::operator=(bool rhs)
  5251. {
  5252. set(rhs);
  5253. return *this;
  5254. }
  5255. #ifdef PUGIXML_HAS_LONG_LONG
  5256. PUGI__FN xml_text& xml_text::operator=(long long rhs)
  5257. {
  5258. set(rhs);
  5259. return *this;
  5260. }
  5261. PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
  5262. {
  5263. set(rhs);
  5264. return *this;
  5265. }
  5266. #endif
  5267. PUGI__FN xml_node xml_text::data() const
  5268. {
  5269. return xml_node(_data());
  5270. }
  5271. #ifdef __BORLANDC__
  5272. PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
  5273. {
  5274. return (bool)lhs && rhs;
  5275. }
  5276. PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
  5277. {
  5278. return (bool)lhs || rhs;
  5279. }
  5280. #endif
  5281. PUGI__FN xml_node_iterator::xml_node_iterator()
  5282. {
  5283. }
  5284. PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
  5285. {
  5286. }
  5287. PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5288. {
  5289. }
  5290. PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
  5291. {
  5292. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5293. }
  5294. PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
  5295. {
  5296. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5297. }
  5298. PUGI__FN xml_node& xml_node_iterator::operator*() const
  5299. {
  5300. assert(_wrap._root);
  5301. return _wrap;
  5302. }
  5303. PUGI__FN xml_node* xml_node_iterator::operator->() const
  5304. {
  5305. assert(_wrap._root);
  5306. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5307. }
  5308. PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
  5309. {
  5310. assert(_wrap._root);
  5311. _wrap._root = _wrap._root->next_sibling;
  5312. return *this;
  5313. }
  5314. PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
  5315. {
  5316. xml_node_iterator temp = *this;
  5317. ++*this;
  5318. return temp;
  5319. }
  5320. PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
  5321. {
  5322. _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
  5323. return *this;
  5324. }
  5325. PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
  5326. {
  5327. xml_node_iterator temp = *this;
  5328. --*this;
  5329. return temp;
  5330. }
  5331. PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
  5332. {
  5333. }
  5334. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
  5335. {
  5336. }
  5337. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5338. {
  5339. }
  5340. PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
  5341. {
  5342. return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
  5343. }
  5344. PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
  5345. {
  5346. return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
  5347. }
  5348. PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
  5349. {
  5350. assert(_wrap._attr);
  5351. return _wrap;
  5352. }
  5353. PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
  5354. {
  5355. assert(_wrap._attr);
  5356. return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
  5357. }
  5358. PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
  5359. {
  5360. assert(_wrap._attr);
  5361. _wrap._attr = _wrap._attr->next_attribute;
  5362. return *this;
  5363. }
  5364. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
  5365. {
  5366. xml_attribute_iterator temp = *this;
  5367. ++*this;
  5368. return temp;
  5369. }
  5370. PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
  5371. {
  5372. _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
  5373. return *this;
  5374. }
  5375. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
  5376. {
  5377. xml_attribute_iterator temp = *this;
  5378. --*this;
  5379. return temp;
  5380. }
  5381. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
  5382. {
  5383. }
  5384. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
  5385. {
  5386. }
  5387. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
  5388. {
  5389. }
  5390. PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
  5391. {
  5392. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5393. }
  5394. PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
  5395. {
  5396. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5397. }
  5398. PUGI__FN xml_node& xml_named_node_iterator::operator*() const
  5399. {
  5400. assert(_wrap._root);
  5401. return _wrap;
  5402. }
  5403. PUGI__FN xml_node* xml_named_node_iterator::operator->() const
  5404. {
  5405. assert(_wrap._root);
  5406. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5407. }
  5408. PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
  5409. {
  5410. assert(_wrap._root);
  5411. _wrap = _wrap.next_sibling(_name);
  5412. return *this;
  5413. }
  5414. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
  5415. {
  5416. xml_named_node_iterator temp = *this;
  5417. ++*this;
  5418. return temp;
  5419. }
  5420. PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
  5421. {
  5422. if (_wrap._root)
  5423. _wrap = _wrap.previous_sibling(_name);
  5424. else
  5425. {
  5426. _wrap = _parent.last_child();
  5427. if (!impl::strequal(_wrap.name(), _name))
  5428. _wrap = _wrap.previous_sibling(_name);
  5429. }
  5430. return *this;
  5431. }
  5432. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
  5433. {
  5434. xml_named_node_iterator temp = *this;
  5435. --*this;
  5436. return temp;
  5437. }
  5438. PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
  5439. {
  5440. }
  5441. PUGI__FN xml_parse_result::operator bool() const
  5442. {
  5443. return status == status_ok;
  5444. }
  5445. PUGI__FN const char* xml_parse_result::description() const
  5446. {
  5447. switch (status)
  5448. {
  5449. case status_ok: return "No error";
  5450. case status_file_not_found: return "File was not found";
  5451. case status_io_error: return "Error reading from file/stream";
  5452. case status_out_of_memory: return "Could not allocate memory";
  5453. case status_internal_error: return "Internal error occurred";
  5454. case status_unrecognized_tag: return "Could not determine tag type";
  5455. case status_bad_pi: return "Error parsing document declaration/processing instruction";
  5456. case status_bad_comment: return "Error parsing comment";
  5457. case status_bad_cdata: return "Error parsing CDATA section";
  5458. case status_bad_doctype: return "Error parsing document type declaration";
  5459. case status_bad_pcdata: return "Error parsing PCDATA section";
  5460. case status_bad_start_element: return "Error parsing start element tag";
  5461. case status_bad_attribute: return "Error parsing element attribute";
  5462. case status_bad_end_element: return "Error parsing end element tag";
  5463. case status_end_element_mismatch: return "Start-end tags mismatch";
  5464. case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
  5465. case status_no_document_element: return "No document element found";
  5466. default: return "Unknown error";
  5467. }
  5468. }
  5469. PUGI__FN xml_document::xml_document(): _buffer(0)
  5470. {
  5471. _create();
  5472. }
  5473. PUGI__FN xml_document::~xml_document()
  5474. {
  5475. _destroy();
  5476. }
  5477. #ifdef PUGIXML_HAS_MOVE
  5478. PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
  5479. {
  5480. _create();
  5481. _move(rhs);
  5482. }
  5483. PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5484. {
  5485. if (this == &rhs) return *this;
  5486. _destroy();
  5487. _create();
  5488. _move(rhs);
  5489. return *this;
  5490. }
  5491. #endif
  5492. PUGI__FN void xml_document::reset()
  5493. {
  5494. _destroy();
  5495. _create();
  5496. }
  5497. PUGI__FN void xml_document::reset(const xml_document& proto)
  5498. {
  5499. reset();
  5500. impl::node_copy_tree(_root, proto._root);
  5501. }
  5502. PUGI__FN void xml_document::_create()
  5503. {
  5504. assert(!_root);
  5505. #ifdef PUGIXML_COMPACT
  5506. // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
  5507. const size_t page_offset = sizeof(void*);
  5508. #else
  5509. const size_t page_offset = 0;
  5510. #endif
  5511. // initialize sentinel page
  5512. PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
  5513. // prepare page structure
  5514. impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
  5515. assert(page);
  5516. page->busy_size = impl::xml_memory_page_size;
  5517. // setup first page marker
  5518. #ifdef PUGIXML_COMPACT
  5519. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  5520. page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
  5521. *page->compact_page_marker = sizeof(impl::xml_memory_page);
  5522. #endif
  5523. // allocate new root
  5524. _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
  5525. _root->prev_sibling_c = _root;
  5526. // setup sentinel page
  5527. page->allocator = static_cast<impl::xml_document_struct*>(_root);
  5528. // setup hash table pointer in allocator
  5529. #ifdef PUGIXML_COMPACT
  5530. page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
  5531. #endif
  5532. // verify the document allocation
  5533. assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
  5534. }
  5535. PUGI__FN void xml_document::_destroy()
  5536. {
  5537. assert(_root);
  5538. // destroy static storage
  5539. if (_buffer)
  5540. {
  5541. impl::xml_memory::deallocate(_buffer);
  5542. _buffer = 0;
  5543. }
  5544. // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
  5545. for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
  5546. {
  5547. if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
  5548. }
  5549. // destroy dynamic storage, leave sentinel page (it's in static memory)
  5550. impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
  5551. assert(root_page && !root_page->prev);
  5552. assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
  5553. for (impl::xml_memory_page* page = root_page->next; page; )
  5554. {
  5555. impl::xml_memory_page* next = page->next;
  5556. impl::xml_allocator::deallocate_page(page);
  5557. page = next;
  5558. }
  5559. #ifdef PUGIXML_COMPACT
  5560. // destroy hash table
  5561. static_cast<impl::xml_document_struct*>(_root)->hash.clear();
  5562. #endif
  5563. _root = 0;
  5564. }
  5565. #ifdef PUGIXML_HAS_MOVE
  5566. PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5567. {
  5568. impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
  5569. impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
  5570. // save first child pointer for later; this needs hash access
  5571. xml_node_struct* other_first_child = other->first_child;
  5572. #ifdef PUGIXML_COMPACT
  5573. // reserve space for the hash table up front; this is the only operation that can fail
  5574. // if it does, we have no choice but to throw (if we have exceptions)
  5575. if (other_first_child)
  5576. {
  5577. size_t other_children = 0;
  5578. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5579. other_children++;
  5580. // in compact mode, each pointer assignment could result in a hash table request
  5581. // during move, we have to relocate document first_child and parents of all children
  5582. // normally there's just one child and its parent has a pointerless encoding but
  5583. // we assume the worst here
  5584. if (!other->_hash->reserve(other_children + 1))
  5585. {
  5586. #ifdef PUGIXML_NO_EXCEPTIONS
  5587. return;
  5588. #else
  5589. throw std::bad_alloc();
  5590. #endif
  5591. }
  5592. }
  5593. #endif
  5594. // move allocation state
  5595. doc->_root = other->_root;
  5596. doc->_busy_size = other->_busy_size;
  5597. // move buffer state
  5598. doc->buffer = other->buffer;
  5599. doc->extra_buffers = other->extra_buffers;
  5600. _buffer = rhs._buffer;
  5601. #ifdef PUGIXML_COMPACT
  5602. // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
  5603. doc->hash = other->hash;
  5604. doc->_hash = &doc->hash;
  5605. // make sure we don't access other hash up until the end when we reinitialize other document
  5606. other->_hash = 0;
  5607. #endif
  5608. // move page structure
  5609. impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
  5610. assert(doc_page && !doc_page->prev && !doc_page->next);
  5611. impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
  5612. assert(other_page && !other_page->prev);
  5613. // relink pages since root page is embedded into xml_document
  5614. if (impl::xml_memory_page* page = other_page->next)
  5615. {
  5616. assert(page->prev == other_page);
  5617. page->prev = doc_page;
  5618. doc_page->next = page;
  5619. other_page->next = 0;
  5620. }
  5621. // make sure pages point to the correct document state
  5622. for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
  5623. {
  5624. assert(page->allocator == other);
  5625. page->allocator = doc;
  5626. #ifdef PUGIXML_COMPACT
  5627. // this automatically migrates most children between documents and prevents ->parent assignment from allocating
  5628. if (page->compact_shared_parent == other)
  5629. page->compact_shared_parent = doc;
  5630. #endif
  5631. }
  5632. // move tree structure
  5633. assert(!doc->first_child);
  5634. doc->first_child = other_first_child;
  5635. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5636. {
  5637. #ifdef PUGIXML_COMPACT
  5638. // most children will have migrated when we reassigned compact_shared_parent
  5639. assert(node->parent == other || node->parent == doc);
  5640. node->parent = doc;
  5641. #else
  5642. assert(node->parent == other);
  5643. node->parent = doc;
  5644. #endif
  5645. }
  5646. // reset other document
  5647. new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
  5648. rhs._buffer = 0;
  5649. }
  5650. #endif
  5651. #ifndef PUGIXML_NO_STL
  5652. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
  5653. {
  5654. reset();
  5655. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
  5656. }
  5657. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
  5658. {
  5659. reset();
  5660. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
  5661. }
  5662. #endif
  5663. PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
  5664. {
  5665. // Force native encoding (skip autodetection)
  5666. #ifdef PUGIXML_WCHAR_MODE
  5667. xml_encoding encoding = encoding_wchar;
  5668. #else
  5669. xml_encoding encoding = encoding_utf8;
  5670. #endif
  5671. return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
  5672. }
  5673. PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
  5674. {
  5675. return load_string(contents, options);
  5676. }
  5677. PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
  5678. {
  5679. reset();
  5680. using impl::auto_deleter; // MSVC7 workaround
  5681. auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
  5682. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5683. }
  5684. PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
  5685. {
  5686. reset();
  5687. using impl::auto_deleter; // MSVC7 workaround
  5688. auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
  5689. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5690. }
  5691. PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5692. {
  5693. reset();
  5694. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
  5695. }
  5696. PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5697. {
  5698. reset();
  5699. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
  5700. }
  5701. PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5702. {
  5703. reset();
  5704. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
  5705. }
  5706. PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5707. {
  5708. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5709. if ((flags & format_write_bom) && encoding != encoding_latin1)
  5710. {
  5711. // BOM always represents the codepoint U+FEFF, so just write it in native encoding
  5712. #ifdef PUGIXML_WCHAR_MODE
  5713. unsigned int bom = 0xfeff;
  5714. buffered_writer.write(static_cast<wchar_t>(bom));
  5715. #else
  5716. buffered_writer.write('\xef', '\xbb', '\xbf');
  5717. #endif
  5718. }
  5719. if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
  5720. {
  5721. buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
  5722. if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
  5723. buffered_writer.write('?', '>');
  5724. if (!(flags & format_raw)) buffered_writer.write('\n');
  5725. }
  5726. impl::node_output(buffered_writer, _root, indent, flags, 0);
  5727. buffered_writer.flush();
  5728. }
  5729. #ifndef PUGIXML_NO_STL
  5730. PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5731. {
  5732. xml_writer_stream writer(stream);
  5733. save(writer, indent, flags, encoding);
  5734. }
  5735. PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
  5736. {
  5737. xml_writer_stream writer(stream);
  5738. save(writer, indent, flags, encoding_wchar);
  5739. }
  5740. #endif
  5741. PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5742. {
  5743. using impl::auto_deleter; // MSVC7 workaround
  5744. auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
  5745. return impl::save_file_impl(*this, file.data, indent, flags, encoding);
  5746. }
  5747. PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5748. {
  5749. using impl::auto_deleter; // MSVC7 workaround
  5750. auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
  5751. return impl::save_file_impl(*this, file.data, indent, flags, encoding);
  5752. }
  5753. PUGI__FN xml_node xml_document::document_element() const
  5754. {
  5755. assert(_root);
  5756. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5757. if (PUGI__NODETYPE(i) == node_element)
  5758. return xml_node(i);
  5759. return xml_node();
  5760. }
  5761. #ifndef PUGIXML_NO_STL
  5762. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
  5763. {
  5764. assert(str);
  5765. return impl::as_utf8_impl(str, impl::strlength_wide(str));
  5766. }
  5767. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
  5768. {
  5769. return impl::as_utf8_impl(str.c_str(), str.size());
  5770. }
  5771. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
  5772. {
  5773. assert(str);
  5774. return impl::as_wide_impl(str, strlen(str));
  5775. }
  5776. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
  5777. {
  5778. return impl::as_wide_impl(str.c_str(), str.size());
  5779. }
  5780. #endif
  5781. PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
  5782. {
  5783. impl::xml_memory::allocate = allocate;
  5784. impl::xml_memory::deallocate = deallocate;
  5785. }
  5786. PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
  5787. {
  5788. return impl::xml_memory::allocate;
  5789. }
  5790. PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
  5791. {
  5792. return impl::xml_memory::deallocate;
  5793. }
  5794. }
  5795. #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
  5796. namespace std
  5797. {
  5798. // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
  5799. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
  5800. {
  5801. return std::bidirectional_iterator_tag();
  5802. }
  5803. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
  5804. {
  5805. return std::bidirectional_iterator_tag();
  5806. }
  5807. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
  5808. {
  5809. return std::bidirectional_iterator_tag();
  5810. }
  5811. }
  5812. #endif
  5813. #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
  5814. namespace std
  5815. {
  5816. // Workarounds for (non-standard) iterator category detection
  5817. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
  5818. {
  5819. return std::bidirectional_iterator_tag();
  5820. }
  5821. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
  5822. {
  5823. return std::bidirectional_iterator_tag();
  5824. }
  5825. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
  5826. {
  5827. return std::bidirectional_iterator_tag();
  5828. }
  5829. }
  5830. #endif
  5831. #ifndef PUGIXML_NO_XPATH
  5832. // STL replacements
  5833. PUGI__NS_BEGIN
  5834. struct equal_to
  5835. {
  5836. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5837. {
  5838. return lhs == rhs;
  5839. }
  5840. };
  5841. struct not_equal_to
  5842. {
  5843. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5844. {
  5845. return lhs != rhs;
  5846. }
  5847. };
  5848. struct less
  5849. {
  5850. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5851. {
  5852. return lhs < rhs;
  5853. }
  5854. };
  5855. struct less_equal
  5856. {
  5857. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5858. {
  5859. return lhs <= rhs;
  5860. }
  5861. };
  5862. template <typename T> inline void swap(T& lhs, T& rhs)
  5863. {
  5864. T temp = lhs;
  5865. lhs = rhs;
  5866. rhs = temp;
  5867. }
  5868. template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
  5869. {
  5870. I result = begin;
  5871. for (I it = begin + 1; it != end; ++it)
  5872. if (pred(*it, *result))
  5873. result = it;
  5874. return result;
  5875. }
  5876. template <typename I> PUGI__FN void reverse(I begin, I end)
  5877. {
  5878. while (end - begin > 1)
  5879. swap(*begin++, *--end);
  5880. }
  5881. template <typename I> PUGI__FN I unique(I begin, I end)
  5882. {
  5883. // fast skip head
  5884. while (end - begin > 1 && *begin != *(begin + 1))
  5885. begin++;
  5886. if (begin == end)
  5887. return begin;
  5888. // last written element
  5889. I write = begin++;
  5890. // merge unique elements
  5891. while (begin != end)
  5892. {
  5893. if (*begin != *write)
  5894. *++write = *begin++;
  5895. else
  5896. begin++;
  5897. }
  5898. // past-the-end (write points to live element)
  5899. return write + 1;
  5900. }
  5901. template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
  5902. {
  5903. if (begin == end)
  5904. return;
  5905. for (T* it = begin + 1; it != end; ++it)
  5906. {
  5907. T val = *it;
  5908. T* hole = it;
  5909. // move hole backwards
  5910. while (hole > begin && pred(val, *(hole - 1)))
  5911. {
  5912. *hole = *(hole - 1);
  5913. hole--;
  5914. }
  5915. // fill hole with element
  5916. *hole = val;
  5917. }
  5918. }
  5919. template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
  5920. {
  5921. if (pred(*middle, *first))
  5922. swap(middle, first);
  5923. if (pred(*last, *middle))
  5924. swap(last, middle);
  5925. if (pred(*middle, *first))
  5926. swap(middle, first);
  5927. return middle;
  5928. }
  5929. template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
  5930. {
  5931. // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
  5932. T* eq = begin;
  5933. T* lt = begin;
  5934. T* gt = end;
  5935. while (lt < gt)
  5936. {
  5937. if (pred(*lt, pivot))
  5938. lt++;
  5939. else if (*lt == pivot)
  5940. swap(*eq++, *lt++);
  5941. else
  5942. swap(*lt, *--gt);
  5943. }
  5944. // we now have just 4 groups: = < >; move equal elements to the middle
  5945. T* eqbeg = gt;
  5946. for (T* it = begin; it != eq; ++it)
  5947. swap(*it, *--eqbeg);
  5948. *out_eqbeg = eqbeg;
  5949. *out_eqend = gt;
  5950. }
  5951. template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
  5952. {
  5953. // sort large chunks
  5954. while (end - begin > 16)
  5955. {
  5956. // find median element
  5957. I middle = begin + (end - begin) / 2;
  5958. I median = median3(begin, middle, end - 1, pred);
  5959. // partition in three chunks (< = >)
  5960. I eqbeg, eqend;
  5961. partition3(begin, end, *median, pred, &eqbeg, &eqend);
  5962. // loop on larger half
  5963. if (eqbeg - begin > end - eqend)
  5964. {
  5965. sort(eqend, end, pred);
  5966. end = eqbeg;
  5967. }
  5968. else
  5969. {
  5970. sort(begin, eqbeg, pred);
  5971. begin = eqend;
  5972. }
  5973. }
  5974. // insertion sort small chunk
  5975. insertion_sort(begin, end, pred);
  5976. }
  5977. PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
  5978. {
  5979. assert(key);
  5980. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
  5981. // MurmurHash3 32-bit finalizer
  5982. h ^= h >> 16;
  5983. h *= 0x85ebca6bu;
  5984. h ^= h >> 13;
  5985. h *= 0xc2b2ae35u;
  5986. h ^= h >> 16;
  5987. size_t hashmod = size - 1;
  5988. size_t bucket = h & hashmod;
  5989. for (size_t probe = 0; probe <= hashmod; ++probe)
  5990. {
  5991. if (table[bucket] == 0)
  5992. {
  5993. table[bucket] = key;
  5994. return true;
  5995. }
  5996. if (table[bucket] == key)
  5997. return false;
  5998. // hash collision, quadratic probing
  5999. bucket = (bucket + probe + 1) & hashmod;
  6000. }
  6001. assert(false && "Hash table is full"); // unreachable
  6002. return false;
  6003. }
  6004. PUGI__NS_END
  6005. // Allocator used for AST and evaluation stacks
  6006. PUGI__NS_BEGIN
  6007. static const size_t xpath_memory_page_size =
  6008. #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6009. PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6010. #else
  6011. 4096
  6012. #endif
  6013. ;
  6014. static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
  6015. struct xpath_memory_block
  6016. {
  6017. xpath_memory_block* next;
  6018. size_t capacity;
  6019. union
  6020. {
  6021. char data[xpath_memory_page_size];
  6022. double alignment;
  6023. };
  6024. };
  6025. struct xpath_allocator
  6026. {
  6027. xpath_memory_block* _root;
  6028. size_t _root_size;
  6029. bool* _error;
  6030. xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
  6031. {
  6032. }
  6033. void* allocate(size_t size)
  6034. {
  6035. // round size up to block alignment boundary
  6036. size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6037. if (_root_size + size <= _root->capacity)
  6038. {
  6039. void* buf = &_root->data[0] + _root_size;
  6040. _root_size += size;
  6041. return buf;
  6042. }
  6043. else
  6044. {
  6045. // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
  6046. size_t block_capacity_base = sizeof(_root->data);
  6047. size_t block_capacity_req = size + block_capacity_base / 4;
  6048. size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
  6049. size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
  6050. xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
  6051. if (!block)
  6052. {
  6053. if (_error) *_error = true;
  6054. return 0;
  6055. }
  6056. block->next = _root;
  6057. block->capacity = block_capacity;
  6058. _root = block;
  6059. _root_size = size;
  6060. return block->data;
  6061. }
  6062. }
  6063. void* reallocate(void* ptr, size_t old_size, size_t new_size)
  6064. {
  6065. // round size up to block alignment boundary
  6066. old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6067. new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6068. // we can only reallocate the last object
  6069. assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
  6070. // try to reallocate the object inplace
  6071. if (ptr && _root_size - old_size + new_size <= _root->capacity)
  6072. {
  6073. _root_size = _root_size - old_size + new_size;
  6074. return ptr;
  6075. }
  6076. // allocate a new block
  6077. void* result = allocate(new_size);
  6078. if (!result) return 0;
  6079. // we have a new block
  6080. if (ptr)
  6081. {
  6082. // copy old data (we only support growing)
  6083. assert(new_size >= old_size);
  6084. memcpy(result, ptr, old_size);
  6085. // free the previous page if it had no other objects
  6086. assert(_root->data == result);
  6087. assert(_root->next);
  6088. if (_root->next->data == ptr)
  6089. {
  6090. // deallocate the whole page, unless it was the first one
  6091. xpath_memory_block* next = _root->next->next;
  6092. if (next)
  6093. {
  6094. xml_memory::deallocate(_root->next);
  6095. _root->next = next;
  6096. }
  6097. }
  6098. }
  6099. return result;
  6100. }
  6101. void revert(const xpath_allocator& state)
  6102. {
  6103. // free all new pages
  6104. xpath_memory_block* cur = _root;
  6105. while (cur != state._root)
  6106. {
  6107. xpath_memory_block* next = cur->next;
  6108. xml_memory::deallocate(cur);
  6109. cur = next;
  6110. }
  6111. // restore state
  6112. _root = state._root;
  6113. _root_size = state._root_size;
  6114. }
  6115. void release()
  6116. {
  6117. xpath_memory_block* cur = _root;
  6118. assert(cur);
  6119. while (cur->next)
  6120. {
  6121. xpath_memory_block* next = cur->next;
  6122. xml_memory::deallocate(cur);
  6123. cur = next;
  6124. }
  6125. }
  6126. };
  6127. struct xpath_allocator_capture
  6128. {
  6129. xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
  6130. {
  6131. }
  6132. ~xpath_allocator_capture()
  6133. {
  6134. _target->revert(_state);
  6135. }
  6136. xpath_allocator* _target;
  6137. xpath_allocator _state;
  6138. };
  6139. struct xpath_stack
  6140. {
  6141. xpath_allocator* result;
  6142. xpath_allocator* temp;
  6143. };
  6144. struct xpath_stack_data
  6145. {
  6146. xpath_memory_block blocks[2];
  6147. xpath_allocator result;
  6148. xpath_allocator temp;
  6149. xpath_stack stack;
  6150. bool oom;
  6151. xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
  6152. {
  6153. blocks[0].next = blocks[1].next = 0;
  6154. blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
  6155. stack.result = &result;
  6156. stack.temp = &temp;
  6157. }
  6158. ~xpath_stack_data()
  6159. {
  6160. result.release();
  6161. temp.release();
  6162. }
  6163. };
  6164. PUGI__NS_END
  6165. // String class
  6166. PUGI__NS_BEGIN
  6167. class xpath_string
  6168. {
  6169. const char_t* _buffer;
  6170. bool _uses_heap;
  6171. size_t _length_heap;
  6172. static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
  6173. {
  6174. char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
  6175. if (!result) return 0;
  6176. memcpy(result, string, length * sizeof(char_t));
  6177. result[length] = 0;
  6178. return result;
  6179. }
  6180. xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
  6181. {
  6182. }
  6183. public:
  6184. static xpath_string from_const(const char_t* str)
  6185. {
  6186. return xpath_string(str, false, 0);
  6187. }
  6188. static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
  6189. {
  6190. assert(begin <= end && *end == 0);
  6191. return xpath_string(begin, true, static_cast<size_t>(end - begin));
  6192. }
  6193. static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
  6194. {
  6195. assert(begin <= end);
  6196. if (begin == end)
  6197. return xpath_string();
  6198. size_t length = static_cast<size_t>(end - begin);
  6199. const char_t* data = duplicate_string(begin, length, alloc);
  6200. return data ? xpath_string(data, true, length) : xpath_string();
  6201. }
  6202. xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
  6203. {
  6204. }
  6205. void append(const xpath_string& o, xpath_allocator* alloc)
  6206. {
  6207. // skip empty sources
  6208. if (!*o._buffer) return;
  6209. // fast append for constant empty target and constant source
  6210. if (!*_buffer && !_uses_heap && !o._uses_heap)
  6211. {
  6212. _buffer = o._buffer;
  6213. }
  6214. else
  6215. {
  6216. // need to make heap copy
  6217. size_t target_length = length();
  6218. size_t source_length = o.length();
  6219. size_t result_length = target_length + source_length;
  6220. // allocate new buffer
  6221. char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
  6222. if (!result) return;
  6223. // append first string to the new buffer in case there was no reallocation
  6224. if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
  6225. // append second string to the new buffer
  6226. memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
  6227. result[result_length] = 0;
  6228. // finalize
  6229. _buffer = result;
  6230. _uses_heap = true;
  6231. _length_heap = result_length;
  6232. }
  6233. }
  6234. const char_t* c_str() const
  6235. {
  6236. return _buffer;
  6237. }
  6238. size_t length() const
  6239. {
  6240. return _uses_heap ? _length_heap : strlength(_buffer);
  6241. }
  6242. char_t* data(xpath_allocator* alloc)
  6243. {
  6244. // make private heap copy
  6245. if (!_uses_heap)
  6246. {
  6247. size_t length_ = strlength(_buffer);
  6248. const char_t* data_ = duplicate_string(_buffer, length_, alloc);
  6249. if (!data_) return 0;
  6250. _buffer = data_;
  6251. _uses_heap = true;
  6252. _length_heap = length_;
  6253. }
  6254. return const_cast<char_t*>(_buffer);
  6255. }
  6256. bool empty() const
  6257. {
  6258. return *_buffer == 0;
  6259. }
  6260. bool operator==(const xpath_string& o) const
  6261. {
  6262. return strequal(_buffer, o._buffer);
  6263. }
  6264. bool operator!=(const xpath_string& o) const
  6265. {
  6266. return !strequal(_buffer, o._buffer);
  6267. }
  6268. bool uses_heap() const
  6269. {
  6270. return _uses_heap;
  6271. }
  6272. };
  6273. PUGI__NS_END
  6274. PUGI__NS_BEGIN
  6275. PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
  6276. {
  6277. while (*pattern && *string == *pattern)
  6278. {
  6279. string++;
  6280. pattern++;
  6281. }
  6282. return *pattern == 0;
  6283. }
  6284. PUGI__FN const char_t* find_char(const char_t* s, char_t c)
  6285. {
  6286. #ifdef PUGIXML_WCHAR_MODE
  6287. return wcschr(s, c);
  6288. #else
  6289. return strchr(s, c);
  6290. #endif
  6291. }
  6292. PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
  6293. {
  6294. #ifdef PUGIXML_WCHAR_MODE
  6295. // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
  6296. return (*p == 0) ? s : wcsstr(s, p);
  6297. #else
  6298. return strstr(s, p);
  6299. #endif
  6300. }
  6301. // Converts symbol to lower case, if it is an ASCII one
  6302. PUGI__FN char_t tolower_ascii(char_t ch)
  6303. {
  6304. return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
  6305. }
  6306. PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
  6307. {
  6308. if (na.attribute())
  6309. return xpath_string::from_const(na.attribute().value());
  6310. else
  6311. {
  6312. xml_node n = na.node();
  6313. switch (n.type())
  6314. {
  6315. case node_pcdata:
  6316. case node_cdata:
  6317. case node_comment:
  6318. case node_pi:
  6319. return xpath_string::from_const(n.value());
  6320. case node_document:
  6321. case node_element:
  6322. {
  6323. xpath_string result;
  6324. // element nodes can have value if parse_embed_pcdata was used
  6325. if (n.value()[0])
  6326. result.append(xpath_string::from_const(n.value()), alloc);
  6327. xml_node cur = n.first_child();
  6328. while (cur && cur != n)
  6329. {
  6330. if (cur.type() == node_pcdata || cur.type() == node_cdata)
  6331. result.append(xpath_string::from_const(cur.value()), alloc);
  6332. if (cur.first_child())
  6333. cur = cur.first_child();
  6334. else if (cur.next_sibling())
  6335. cur = cur.next_sibling();
  6336. else
  6337. {
  6338. while (!cur.next_sibling() && cur != n)
  6339. cur = cur.parent();
  6340. if (cur != n) cur = cur.next_sibling();
  6341. }
  6342. }
  6343. return result;
  6344. }
  6345. default:
  6346. return xpath_string();
  6347. }
  6348. }
  6349. }
  6350. PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
  6351. {
  6352. assert(ln->parent == rn->parent);
  6353. // there is no common ancestor (the shared parent is null), nodes are from different documents
  6354. if (!ln->parent) return ln < rn;
  6355. // determine sibling order
  6356. xml_node_struct* ls = ln;
  6357. xml_node_struct* rs = rn;
  6358. while (ls && rs)
  6359. {
  6360. if (ls == rn) return true;
  6361. if (rs == ln) return false;
  6362. ls = ls->next_sibling;
  6363. rs = rs->next_sibling;
  6364. }
  6365. // if rn sibling chain ended ln must be before rn
  6366. return !rs;
  6367. }
  6368. PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
  6369. {
  6370. // find common ancestor at the same depth, if any
  6371. xml_node_struct* lp = ln;
  6372. xml_node_struct* rp = rn;
  6373. while (lp && rp && lp->parent != rp->parent)
  6374. {
  6375. lp = lp->parent;
  6376. rp = rp->parent;
  6377. }
  6378. // parents are the same!
  6379. if (lp && rp) return node_is_before_sibling(lp, rp);
  6380. // nodes are at different depths, need to normalize heights
  6381. bool left_higher = !lp;
  6382. while (lp)
  6383. {
  6384. lp = lp->parent;
  6385. ln = ln->parent;
  6386. }
  6387. while (rp)
  6388. {
  6389. rp = rp->parent;
  6390. rn = rn->parent;
  6391. }
  6392. // one node is the ancestor of the other
  6393. if (ln == rn) return left_higher;
  6394. // find common ancestor... again
  6395. while (ln->parent != rn->parent)
  6396. {
  6397. ln = ln->parent;
  6398. rn = rn->parent;
  6399. }
  6400. return node_is_before_sibling(ln, rn);
  6401. }
  6402. PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
  6403. {
  6404. while (node && node != parent) node = node->parent;
  6405. return parent && node == parent;
  6406. }
  6407. PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
  6408. {
  6409. xml_node_struct* node = xnode.node().internal_object();
  6410. if (node)
  6411. {
  6412. if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
  6413. {
  6414. if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
  6415. if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
  6416. }
  6417. return 0;
  6418. }
  6419. xml_attribute_struct* attr = xnode.attribute().internal_object();
  6420. if (attr)
  6421. {
  6422. if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
  6423. {
  6424. if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
  6425. if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
  6426. }
  6427. return 0;
  6428. }
  6429. return 0;
  6430. }
  6431. struct document_order_comparator
  6432. {
  6433. bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
  6434. {
  6435. // optimized document order based check
  6436. const void* lo = document_buffer_order(lhs);
  6437. const void* ro = document_buffer_order(rhs);
  6438. if (lo && ro) return lo < ro;
  6439. // slow comparison
  6440. xml_node ln = lhs.node(), rn = rhs.node();
  6441. // compare attributes
  6442. if (lhs.attribute() && rhs.attribute())
  6443. {
  6444. // shared parent
  6445. if (lhs.parent() == rhs.parent())
  6446. {
  6447. // determine sibling order
  6448. for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
  6449. if (a == rhs.attribute())
  6450. return true;
  6451. return false;
  6452. }
  6453. // compare attribute parents
  6454. ln = lhs.parent();
  6455. rn = rhs.parent();
  6456. }
  6457. else if (lhs.attribute())
  6458. {
  6459. // attributes go after the parent element
  6460. if (lhs.parent() == rhs.node()) return false;
  6461. ln = lhs.parent();
  6462. }
  6463. else if (rhs.attribute())
  6464. {
  6465. // attributes go after the parent element
  6466. if (rhs.parent() == lhs.node()) return true;
  6467. rn = rhs.parent();
  6468. }
  6469. if (ln == rn) return false;
  6470. if (!ln || !rn) return ln < rn;
  6471. return node_is_before(ln.internal_object(), rn.internal_object());
  6472. }
  6473. };
  6474. PUGI__FN double gen_nan()
  6475. {
  6476. #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
  6477. PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
  6478. typedef uint32_t UI; // BCC5 workaround
  6479. union { float f; UI i; } u;
  6480. u.i = 0x7fc00000;
  6481. return double(u.f);
  6482. #else
  6483. // fallback
  6484. const volatile double zero = 0.0;
  6485. return zero / zero;
  6486. #endif
  6487. }
  6488. PUGI__FN bool is_nan(double value)
  6489. {
  6490. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6491. return !!_isnan(value);
  6492. #elif defined(fpclassify) && defined(FP_NAN)
  6493. return fpclassify(value) == FP_NAN;
  6494. #else
  6495. // fallback
  6496. const volatile double v = value;
  6497. return v != v;
  6498. #endif
  6499. }
  6500. PUGI__FN const char_t* convert_number_to_string_special(double value)
  6501. {
  6502. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6503. if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
  6504. if (_isnan(value)) return PUGIXML_TEXT("NaN");
  6505. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6506. #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
  6507. switch (fpclassify(value))
  6508. {
  6509. case FP_NAN:
  6510. return PUGIXML_TEXT("NaN");
  6511. case FP_INFINITE:
  6512. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6513. case FP_ZERO:
  6514. return PUGIXML_TEXT("0");
  6515. default:
  6516. return 0;
  6517. }
  6518. #else
  6519. // fallback
  6520. const volatile double v = value;
  6521. if (v == 0) return PUGIXML_TEXT("0");
  6522. if (v != v) return PUGIXML_TEXT("NaN");
  6523. if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6524. return 0;
  6525. #endif
  6526. }
  6527. PUGI__FN bool convert_number_to_boolean(double value)
  6528. {
  6529. return (value != 0 && !is_nan(value));
  6530. }
  6531. PUGI__FN void truncate_zeros(char* begin, char* end)
  6532. {
  6533. while (begin != end && end[-1] == '0') end--;
  6534. *end = 0;
  6535. }
  6536. // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
  6537. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
  6538. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6539. {
  6540. // get base values
  6541. int sign, exponent;
  6542. _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
  6543. // truncate redundant zeros
  6544. truncate_zeros(buffer, buffer + strlen(buffer));
  6545. // fill results
  6546. *out_mantissa = buffer;
  6547. *out_exponent = exponent;
  6548. }
  6549. #else
  6550. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6551. {
  6552. // get a scientific notation value with IEEE DBL_DIG decimals
  6553. PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
  6554. // get the exponent (possibly negative)
  6555. char* exponent_string = strchr(buffer, 'e');
  6556. assert(exponent_string);
  6557. int exponent = atoi(exponent_string + 1);
  6558. // extract mantissa string: skip sign
  6559. char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
  6560. assert(mantissa[0] != '0' && mantissa[1] == '.');
  6561. // divide mantissa by 10 to eliminate integer part
  6562. mantissa[1] = mantissa[0];
  6563. mantissa++;
  6564. exponent++;
  6565. // remove extra mantissa digits and zero-terminate mantissa
  6566. truncate_zeros(mantissa, exponent_string);
  6567. // fill results
  6568. *out_mantissa = mantissa;
  6569. *out_exponent = exponent;
  6570. }
  6571. #endif
  6572. PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
  6573. {
  6574. // try special number conversion
  6575. const char_t* special = convert_number_to_string_special(value);
  6576. if (special) return xpath_string::from_const(special);
  6577. // get mantissa + exponent form
  6578. char mantissa_buffer[32];
  6579. char* mantissa;
  6580. int exponent;
  6581. convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
  6582. // allocate a buffer of suitable length for the number
  6583. size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
  6584. char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
  6585. if (!result) return xpath_string();
  6586. // make the number!
  6587. char_t* s = result;
  6588. // sign
  6589. if (value < 0) *s++ = '-';
  6590. // integer part
  6591. if (exponent <= 0)
  6592. {
  6593. *s++ = '0';
  6594. }
  6595. else
  6596. {
  6597. while (exponent > 0)
  6598. {
  6599. assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
  6600. *s++ = *mantissa ? *mantissa++ : '0';
  6601. exponent--;
  6602. }
  6603. }
  6604. // fractional part
  6605. if (*mantissa)
  6606. {
  6607. // decimal point
  6608. *s++ = '.';
  6609. // extra zeroes from negative exponent
  6610. while (exponent < 0)
  6611. {
  6612. *s++ = '0';
  6613. exponent++;
  6614. }
  6615. // extra mantissa digits
  6616. while (*mantissa)
  6617. {
  6618. assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
  6619. *s++ = *mantissa++;
  6620. }
  6621. }
  6622. // zero-terminate
  6623. assert(s < result + result_size);
  6624. *s = 0;
  6625. return xpath_string::from_heap_preallocated(result, s);
  6626. }
  6627. PUGI__FN bool check_string_to_number_format(const char_t* string)
  6628. {
  6629. // parse leading whitespace
  6630. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6631. // parse sign
  6632. if (*string == '-') ++string;
  6633. if (!*string) return false;
  6634. // if there is no integer part, there should be a decimal part with at least one digit
  6635. if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
  6636. // parse integer part
  6637. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6638. // parse decimal part
  6639. if (*string == '.')
  6640. {
  6641. ++string;
  6642. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6643. }
  6644. // parse trailing whitespace
  6645. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6646. return *string == 0;
  6647. }
  6648. PUGI__FN double convert_string_to_number(const char_t* string)
  6649. {
  6650. // check string format
  6651. if (!check_string_to_number_format(string)) return gen_nan();
  6652. // parse string
  6653. #ifdef PUGIXML_WCHAR_MODE
  6654. return wcstod(string, 0);
  6655. #else
  6656. return strtod(string, 0);
  6657. #endif
  6658. }
  6659. PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
  6660. {
  6661. size_t length = static_cast<size_t>(end - begin);
  6662. char_t* scratch = buffer;
  6663. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6664. {
  6665. // need to make dummy on-heap copy
  6666. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6667. if (!scratch) return false;
  6668. }
  6669. // copy string to zero-terminated buffer and perform conversion
  6670. memcpy(scratch, begin, length * sizeof(char_t));
  6671. scratch[length] = 0;
  6672. *out_result = convert_string_to_number(scratch);
  6673. // free dummy buffer
  6674. if (scratch != buffer) xml_memory::deallocate(scratch);
  6675. return true;
  6676. }
  6677. PUGI__FN double round_nearest(double value)
  6678. {
  6679. return floor(value + 0.5);
  6680. }
  6681. PUGI__FN double round_nearest_nzero(double value)
  6682. {
  6683. // same as round_nearest, but returns -0 for [-0.5, -0]
  6684. // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
  6685. return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
  6686. }
  6687. PUGI__FN const char_t* qualified_name(const xpath_node& node)
  6688. {
  6689. return node.attribute() ? node.attribute().name() : node.node().name();
  6690. }
  6691. PUGI__FN const char_t* local_name(const xpath_node& node)
  6692. {
  6693. const char_t* name = qualified_name(node);
  6694. const char_t* p = find_char(name, ':');
  6695. return p ? p + 1 : name;
  6696. }
  6697. struct namespace_uri_predicate
  6698. {
  6699. const char_t* prefix;
  6700. size_t prefix_length;
  6701. namespace_uri_predicate(const char_t* name)
  6702. {
  6703. const char_t* pos = find_char(name, ':');
  6704. prefix = pos ? name : 0;
  6705. prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
  6706. }
  6707. bool operator()(xml_attribute a) const
  6708. {
  6709. const char_t* name = a.name();
  6710. if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
  6711. return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
  6712. }
  6713. };
  6714. PUGI__FN const char_t* namespace_uri(xml_node node)
  6715. {
  6716. namespace_uri_predicate pred = node.name();
  6717. xml_node p = node;
  6718. while (p)
  6719. {
  6720. xml_attribute a = p.find_attribute(pred);
  6721. if (a) return a.value();
  6722. p = p.parent();
  6723. }
  6724. return PUGIXML_TEXT("");
  6725. }
  6726. PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
  6727. {
  6728. namespace_uri_predicate pred = attr.name();
  6729. // Default namespace does not apply to attributes
  6730. if (!pred.prefix) return PUGIXML_TEXT("");
  6731. xml_node p = parent;
  6732. while (p)
  6733. {
  6734. xml_attribute a = p.find_attribute(pred);
  6735. if (a) return a.value();
  6736. p = p.parent();
  6737. }
  6738. return PUGIXML_TEXT("");
  6739. }
  6740. PUGI__FN const char_t* namespace_uri(const xpath_node& node)
  6741. {
  6742. return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
  6743. }
  6744. PUGI__FN char_t* normalize_space(char_t* buffer)
  6745. {
  6746. char_t* write = buffer;
  6747. for (char_t* it = buffer; *it; )
  6748. {
  6749. char_t ch = *it++;
  6750. if (PUGI__IS_CHARTYPE(ch, ct_space))
  6751. {
  6752. // replace whitespace sequence with single space
  6753. while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
  6754. // avoid leading spaces
  6755. if (write != buffer) *write++ = ' ';
  6756. }
  6757. else *write++ = ch;
  6758. }
  6759. // remove trailing space
  6760. if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
  6761. // zero-terminate
  6762. *write = 0;
  6763. return write;
  6764. }
  6765. PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
  6766. {
  6767. char_t* write = buffer;
  6768. while (*buffer)
  6769. {
  6770. PUGI__DMC_VOLATILE char_t ch = *buffer++;
  6771. const char_t* pos = find_char(from, ch);
  6772. if (!pos)
  6773. *write++ = ch; // do not process
  6774. else if (static_cast<size_t>(pos - from) < to_length)
  6775. *write++ = to[pos - from]; // replace
  6776. }
  6777. // zero-terminate
  6778. *write = 0;
  6779. return write;
  6780. }
  6781. PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
  6782. {
  6783. unsigned char table[128] = {0};
  6784. while (*from)
  6785. {
  6786. unsigned int fc = static_cast<unsigned int>(*from);
  6787. unsigned int tc = static_cast<unsigned int>(*to);
  6788. if (fc >= 128 || tc >= 128)
  6789. return 0;
  6790. // code=128 means "skip character"
  6791. if (!table[fc])
  6792. table[fc] = static_cast<unsigned char>(tc ? tc : 128);
  6793. from++;
  6794. if (tc) to++;
  6795. }
  6796. for (int i = 0; i < 128; ++i)
  6797. if (!table[i])
  6798. table[i] = static_cast<unsigned char>(i);
  6799. void* result = alloc->allocate(sizeof(table));
  6800. if (!result) return 0;
  6801. memcpy(result, table, sizeof(table));
  6802. return static_cast<unsigned char*>(result);
  6803. }
  6804. PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
  6805. {
  6806. char_t* write = buffer;
  6807. while (*buffer)
  6808. {
  6809. char_t ch = *buffer++;
  6810. unsigned int index = static_cast<unsigned int>(ch);
  6811. if (index < 128)
  6812. {
  6813. unsigned char code = table[index];
  6814. // code=128 means "skip character" (table size is 128 so 128 can be a special value)
  6815. // this code skips these characters without extra branches
  6816. *write = static_cast<char_t>(code);
  6817. write += 1 - (code >> 7);
  6818. }
  6819. else
  6820. {
  6821. *write++ = ch;
  6822. }
  6823. }
  6824. // zero-terminate
  6825. *write = 0;
  6826. return write;
  6827. }
  6828. inline bool is_xpath_attribute(const char_t* name)
  6829. {
  6830. return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
  6831. }
  6832. struct xpath_variable_boolean: xpath_variable
  6833. {
  6834. xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
  6835. {
  6836. }
  6837. bool value;
  6838. char_t name[1];
  6839. };
  6840. struct xpath_variable_number: xpath_variable
  6841. {
  6842. xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
  6843. {
  6844. }
  6845. double value;
  6846. char_t name[1];
  6847. };
  6848. struct xpath_variable_string: xpath_variable
  6849. {
  6850. xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
  6851. {
  6852. }
  6853. ~xpath_variable_string()
  6854. {
  6855. if (value) xml_memory::deallocate(value);
  6856. }
  6857. char_t* value;
  6858. char_t name[1];
  6859. };
  6860. struct xpath_variable_node_set: xpath_variable
  6861. {
  6862. xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
  6863. {
  6864. }
  6865. xpath_node_set value;
  6866. char_t name[1];
  6867. };
  6868. static const xpath_node_set dummy_node_set;
  6869. PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
  6870. {
  6871. // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
  6872. unsigned int result = 0;
  6873. while (*str)
  6874. {
  6875. result += static_cast<unsigned int>(*str++);
  6876. result += result << 10;
  6877. result ^= result >> 6;
  6878. }
  6879. result += result << 3;
  6880. result ^= result >> 11;
  6881. result += result << 15;
  6882. return result;
  6883. }
  6884. template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
  6885. {
  6886. size_t length = strlength(name);
  6887. if (length == 0) return 0; // empty variable names are invalid
  6888. // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
  6889. void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
  6890. if (!memory) return 0;
  6891. T* result = new (memory) T();
  6892. memcpy(result->name, name, (length + 1) * sizeof(char_t));
  6893. return result;
  6894. }
  6895. PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
  6896. {
  6897. switch (type)
  6898. {
  6899. case xpath_type_node_set:
  6900. return new_xpath_variable<xpath_variable_node_set>(name);
  6901. case xpath_type_number:
  6902. return new_xpath_variable<xpath_variable_number>(name);
  6903. case xpath_type_string:
  6904. return new_xpath_variable<xpath_variable_string>(name);
  6905. case xpath_type_boolean:
  6906. return new_xpath_variable<xpath_variable_boolean>(name);
  6907. default:
  6908. return 0;
  6909. }
  6910. }
  6911. template <typename T> PUGI__FN void delete_xpath_variable(T* var)
  6912. {
  6913. var->~T();
  6914. xml_memory::deallocate(var);
  6915. }
  6916. PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
  6917. {
  6918. switch (type)
  6919. {
  6920. case xpath_type_node_set:
  6921. delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
  6922. break;
  6923. case xpath_type_number:
  6924. delete_xpath_variable(static_cast<xpath_variable_number*>(var));
  6925. break;
  6926. case xpath_type_string:
  6927. delete_xpath_variable(static_cast<xpath_variable_string*>(var));
  6928. break;
  6929. case xpath_type_boolean:
  6930. delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
  6931. break;
  6932. default:
  6933. assert(false && "Invalid variable type"); // unreachable
  6934. }
  6935. }
  6936. PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
  6937. {
  6938. switch (rhs->type())
  6939. {
  6940. case xpath_type_node_set:
  6941. return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
  6942. case xpath_type_number:
  6943. return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
  6944. case xpath_type_string:
  6945. return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
  6946. case xpath_type_boolean:
  6947. return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
  6948. default:
  6949. assert(false && "Invalid variable type"); // unreachable
  6950. return false;
  6951. }
  6952. }
  6953. PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
  6954. {
  6955. size_t length = static_cast<size_t>(end - begin);
  6956. char_t* scratch = buffer;
  6957. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6958. {
  6959. // need to make dummy on-heap copy
  6960. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6961. if (!scratch) return false;
  6962. }
  6963. // copy string to zero-terminated buffer and perform lookup
  6964. memcpy(scratch, begin, length * sizeof(char_t));
  6965. scratch[length] = 0;
  6966. *out_result = set->get(scratch);
  6967. // free dummy buffer
  6968. if (scratch != buffer) xml_memory::deallocate(scratch);
  6969. return true;
  6970. }
  6971. PUGI__NS_END
  6972. // Internal node set class
  6973. PUGI__NS_BEGIN
  6974. PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
  6975. {
  6976. if (end - begin < 2)
  6977. return xpath_node_set::type_sorted;
  6978. document_order_comparator cmp;
  6979. bool first = cmp(begin[0], begin[1]);
  6980. for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
  6981. if (cmp(it[0], it[1]) != first)
  6982. return xpath_node_set::type_unsorted;
  6983. return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
  6984. }
  6985. PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
  6986. {
  6987. xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  6988. if (type == xpath_node_set::type_unsorted)
  6989. {
  6990. xpath_node_set::type_t sorted = xpath_get_order(begin, end);
  6991. if (sorted == xpath_node_set::type_unsorted)
  6992. {
  6993. sort(begin, end, document_order_comparator());
  6994. type = xpath_node_set::type_sorted;
  6995. }
  6996. else
  6997. type = sorted;
  6998. }
  6999. if (type != order) reverse(begin, end);
  7000. return order;
  7001. }
  7002. PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
  7003. {
  7004. if (begin == end) return xpath_node();
  7005. switch (type)
  7006. {
  7007. case xpath_node_set::type_sorted:
  7008. return *begin;
  7009. case xpath_node_set::type_sorted_reverse:
  7010. return *(end - 1);
  7011. case xpath_node_set::type_unsorted:
  7012. return *min_element(begin, end, document_order_comparator());
  7013. default:
  7014. assert(false && "Invalid node set type"); // unreachable
  7015. return xpath_node();
  7016. }
  7017. }
  7018. class xpath_node_set_raw
  7019. {
  7020. xpath_node_set::type_t _type;
  7021. xpath_node* _begin;
  7022. xpath_node* _end;
  7023. xpath_node* _eos;
  7024. public:
  7025. xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
  7026. {
  7027. }
  7028. xpath_node* begin() const
  7029. {
  7030. return _begin;
  7031. }
  7032. xpath_node* end() const
  7033. {
  7034. return _end;
  7035. }
  7036. bool empty() const
  7037. {
  7038. return _begin == _end;
  7039. }
  7040. size_t size() const
  7041. {
  7042. return static_cast<size_t>(_end - _begin);
  7043. }
  7044. xpath_node first() const
  7045. {
  7046. return xpath_first(_begin, _end, _type);
  7047. }
  7048. void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
  7049. void push_back(const xpath_node& node, xpath_allocator* alloc)
  7050. {
  7051. if (_end != _eos)
  7052. *_end++ = node;
  7053. else
  7054. push_back_grow(node, alloc);
  7055. }
  7056. void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
  7057. {
  7058. if (begin_ == end_) return;
  7059. size_t size_ = static_cast<size_t>(_end - _begin);
  7060. size_t capacity = static_cast<size_t>(_eos - _begin);
  7061. size_t count = static_cast<size_t>(end_ - begin_);
  7062. if (size_ + count > capacity)
  7063. {
  7064. // reallocate the old array or allocate a new one
  7065. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
  7066. if (!data) return;
  7067. // finalize
  7068. _begin = data;
  7069. _end = data + size_;
  7070. _eos = data + size_ + count;
  7071. }
  7072. memcpy(_end, begin_, count * sizeof(xpath_node));
  7073. _end += count;
  7074. }
  7075. void sort_do()
  7076. {
  7077. _type = xpath_sort(_begin, _end, _type, false);
  7078. }
  7079. void truncate(xpath_node* pos)
  7080. {
  7081. assert(_begin <= pos && pos <= _end);
  7082. _end = pos;
  7083. }
  7084. void remove_duplicates(xpath_allocator* alloc)
  7085. {
  7086. if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
  7087. {
  7088. xpath_allocator_capture cr(alloc);
  7089. size_t size_ = static_cast<size_t>(_end - _begin);
  7090. size_t hash_size = 1;
  7091. while (hash_size < size_ + size_ / 2) hash_size *= 2;
  7092. const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
  7093. if (!hash_data) return;
  7094. memset(hash_data, 0, hash_size * sizeof(const void**));
  7095. xpath_node* write = _begin;
  7096. for (xpath_node* it = _begin; it != _end; ++it)
  7097. {
  7098. const void* attr = it->attribute().internal_object();
  7099. const void* node = it->node().internal_object();
  7100. const void* key = attr ? attr : node;
  7101. if (key && hash_insert(hash_data, hash_size, key))
  7102. {
  7103. *write++ = *it;
  7104. }
  7105. }
  7106. _end = write;
  7107. }
  7108. else
  7109. {
  7110. _end = unique(_begin, _end);
  7111. }
  7112. }
  7113. xpath_node_set::type_t type() const
  7114. {
  7115. return _type;
  7116. }
  7117. void set_type(xpath_node_set::type_t value)
  7118. {
  7119. _type = value;
  7120. }
  7121. };
  7122. PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
  7123. {
  7124. size_t capacity = static_cast<size_t>(_eos - _begin);
  7125. // get new capacity (1.5x rule)
  7126. size_t new_capacity = capacity + capacity / 2 + 1;
  7127. // reallocate the old array or allocate a new one
  7128. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
  7129. if (!data) return;
  7130. // finalize
  7131. _begin = data;
  7132. _end = data + capacity;
  7133. _eos = data + new_capacity;
  7134. // push
  7135. *_end++ = node;
  7136. }
  7137. PUGI__NS_END
  7138. PUGI__NS_BEGIN
  7139. struct xpath_context
  7140. {
  7141. xpath_node n;
  7142. size_t position, size;
  7143. xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
  7144. {
  7145. }
  7146. };
  7147. enum lexeme_t
  7148. {
  7149. lex_none = 0,
  7150. lex_equal,
  7151. lex_not_equal,
  7152. lex_less,
  7153. lex_greater,
  7154. lex_less_or_equal,
  7155. lex_greater_or_equal,
  7156. lex_plus,
  7157. lex_minus,
  7158. lex_multiply,
  7159. lex_union,
  7160. lex_var_ref,
  7161. lex_open_brace,
  7162. lex_close_brace,
  7163. lex_quoted_string,
  7164. lex_number,
  7165. lex_slash,
  7166. lex_double_slash,
  7167. lex_open_square_brace,
  7168. lex_close_square_brace,
  7169. lex_string,
  7170. lex_comma,
  7171. lex_axis_attribute,
  7172. lex_dot,
  7173. lex_double_dot,
  7174. lex_double_colon,
  7175. lex_eof
  7176. };
  7177. struct xpath_lexer_string
  7178. {
  7179. const char_t* begin;
  7180. const char_t* end;
  7181. xpath_lexer_string(): begin(0), end(0)
  7182. {
  7183. }
  7184. bool operator==(const char_t* other) const
  7185. {
  7186. size_t length = static_cast<size_t>(end - begin);
  7187. return strequalrange(other, begin, length);
  7188. }
  7189. };
  7190. class xpath_lexer
  7191. {
  7192. const char_t* _cur;
  7193. const char_t* _cur_lexeme_pos;
  7194. xpath_lexer_string _cur_lexeme_contents;
  7195. lexeme_t _cur_lexeme;
  7196. public:
  7197. explicit xpath_lexer(const char_t* query): _cur(query)
  7198. {
  7199. next();
  7200. }
  7201. const char_t* state() const
  7202. {
  7203. return _cur;
  7204. }
  7205. void next()
  7206. {
  7207. const char_t* cur = _cur;
  7208. while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
  7209. // save lexeme position for error reporting
  7210. _cur_lexeme_pos = cur;
  7211. switch (*cur)
  7212. {
  7213. case 0:
  7214. _cur_lexeme = lex_eof;
  7215. break;
  7216. case '>':
  7217. if (*(cur+1) == '=')
  7218. {
  7219. cur += 2;
  7220. _cur_lexeme = lex_greater_or_equal;
  7221. }
  7222. else
  7223. {
  7224. cur += 1;
  7225. _cur_lexeme = lex_greater;
  7226. }
  7227. break;
  7228. case '<':
  7229. if (*(cur+1) == '=')
  7230. {
  7231. cur += 2;
  7232. _cur_lexeme = lex_less_or_equal;
  7233. }
  7234. else
  7235. {
  7236. cur += 1;
  7237. _cur_lexeme = lex_less;
  7238. }
  7239. break;
  7240. case '!':
  7241. if (*(cur+1) == '=')
  7242. {
  7243. cur += 2;
  7244. _cur_lexeme = lex_not_equal;
  7245. }
  7246. else
  7247. {
  7248. _cur_lexeme = lex_none;
  7249. }
  7250. break;
  7251. case '=':
  7252. cur += 1;
  7253. _cur_lexeme = lex_equal;
  7254. break;
  7255. case '+':
  7256. cur += 1;
  7257. _cur_lexeme = lex_plus;
  7258. break;
  7259. case '-':
  7260. cur += 1;
  7261. _cur_lexeme = lex_minus;
  7262. break;
  7263. case '*':
  7264. cur += 1;
  7265. _cur_lexeme = lex_multiply;
  7266. break;
  7267. case '|':
  7268. cur += 1;
  7269. _cur_lexeme = lex_union;
  7270. break;
  7271. case '$':
  7272. cur += 1;
  7273. if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7274. {
  7275. _cur_lexeme_contents.begin = cur;
  7276. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7277. if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
  7278. {
  7279. cur++; // :
  7280. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7281. }
  7282. _cur_lexeme_contents.end = cur;
  7283. _cur_lexeme = lex_var_ref;
  7284. }
  7285. else
  7286. {
  7287. _cur_lexeme = lex_none;
  7288. }
  7289. break;
  7290. case '(':
  7291. cur += 1;
  7292. _cur_lexeme = lex_open_brace;
  7293. break;
  7294. case ')':
  7295. cur += 1;
  7296. _cur_lexeme = lex_close_brace;
  7297. break;
  7298. case '[':
  7299. cur += 1;
  7300. _cur_lexeme = lex_open_square_brace;
  7301. break;
  7302. case ']':
  7303. cur += 1;
  7304. _cur_lexeme = lex_close_square_brace;
  7305. break;
  7306. case ',':
  7307. cur += 1;
  7308. _cur_lexeme = lex_comma;
  7309. break;
  7310. case '/':
  7311. if (*(cur+1) == '/')
  7312. {
  7313. cur += 2;
  7314. _cur_lexeme = lex_double_slash;
  7315. }
  7316. else
  7317. {
  7318. cur += 1;
  7319. _cur_lexeme = lex_slash;
  7320. }
  7321. break;
  7322. case '.':
  7323. if (*(cur+1) == '.')
  7324. {
  7325. cur += 2;
  7326. _cur_lexeme = lex_double_dot;
  7327. }
  7328. else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
  7329. {
  7330. _cur_lexeme_contents.begin = cur; // .
  7331. ++cur;
  7332. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7333. _cur_lexeme_contents.end = cur;
  7334. _cur_lexeme = lex_number;
  7335. }
  7336. else
  7337. {
  7338. cur += 1;
  7339. _cur_lexeme = lex_dot;
  7340. }
  7341. break;
  7342. case '@':
  7343. cur += 1;
  7344. _cur_lexeme = lex_axis_attribute;
  7345. break;
  7346. case '"':
  7347. case '\'':
  7348. {
  7349. char_t terminator = *cur;
  7350. ++cur;
  7351. _cur_lexeme_contents.begin = cur;
  7352. while (*cur && *cur != terminator) cur++;
  7353. _cur_lexeme_contents.end = cur;
  7354. if (!*cur)
  7355. _cur_lexeme = lex_none;
  7356. else
  7357. {
  7358. cur += 1;
  7359. _cur_lexeme = lex_quoted_string;
  7360. }
  7361. break;
  7362. }
  7363. case ':':
  7364. if (*(cur+1) == ':')
  7365. {
  7366. cur += 2;
  7367. _cur_lexeme = lex_double_colon;
  7368. }
  7369. else
  7370. {
  7371. _cur_lexeme = lex_none;
  7372. }
  7373. break;
  7374. default:
  7375. if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
  7376. {
  7377. _cur_lexeme_contents.begin = cur;
  7378. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7379. if (*cur == '.')
  7380. {
  7381. cur++;
  7382. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7383. }
  7384. _cur_lexeme_contents.end = cur;
  7385. _cur_lexeme = lex_number;
  7386. }
  7387. else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7388. {
  7389. _cur_lexeme_contents.begin = cur;
  7390. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7391. if (cur[0] == ':')
  7392. {
  7393. if (cur[1] == '*') // namespace test ncname:*
  7394. {
  7395. cur += 2; // :*
  7396. }
  7397. else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
  7398. {
  7399. cur++; // :
  7400. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7401. }
  7402. }
  7403. _cur_lexeme_contents.end = cur;
  7404. _cur_lexeme = lex_string;
  7405. }
  7406. else
  7407. {
  7408. _cur_lexeme = lex_none;
  7409. }
  7410. }
  7411. _cur = cur;
  7412. }
  7413. lexeme_t current() const
  7414. {
  7415. return _cur_lexeme;
  7416. }
  7417. const char_t* current_pos() const
  7418. {
  7419. return _cur_lexeme_pos;
  7420. }
  7421. const xpath_lexer_string& contents() const
  7422. {
  7423. assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
  7424. return _cur_lexeme_contents;
  7425. }
  7426. };
  7427. enum ast_type_t
  7428. {
  7429. ast_unknown,
  7430. ast_op_or, // left or right
  7431. ast_op_and, // left and right
  7432. ast_op_equal, // left = right
  7433. ast_op_not_equal, // left != right
  7434. ast_op_less, // left < right
  7435. ast_op_greater, // left > right
  7436. ast_op_less_or_equal, // left <= right
  7437. ast_op_greater_or_equal, // left >= right
  7438. ast_op_add, // left + right
  7439. ast_op_subtract, // left - right
  7440. ast_op_multiply, // left * right
  7441. ast_op_divide, // left / right
  7442. ast_op_mod, // left % right
  7443. ast_op_negate, // left - right
  7444. ast_op_union, // left | right
  7445. ast_predicate, // apply predicate to set; next points to next predicate
  7446. ast_filter, // select * from left where right
  7447. ast_string_constant, // string constant
  7448. ast_number_constant, // number constant
  7449. ast_variable, // variable
  7450. ast_func_last, // last()
  7451. ast_func_position, // position()
  7452. ast_func_count, // count(left)
  7453. ast_func_id, // id(left)
  7454. ast_func_local_name_0, // local-name()
  7455. ast_func_local_name_1, // local-name(left)
  7456. ast_func_namespace_uri_0, // namespace-uri()
  7457. ast_func_namespace_uri_1, // namespace-uri(left)
  7458. ast_func_name_0, // name()
  7459. ast_func_name_1, // name(left)
  7460. ast_func_string_0, // string()
  7461. ast_func_string_1, // string(left)
  7462. ast_func_concat, // concat(left, right, siblings)
  7463. ast_func_starts_with, // starts_with(left, right)
  7464. ast_func_contains, // contains(left, right)
  7465. ast_func_substring_before, // substring-before(left, right)
  7466. ast_func_substring_after, // substring-after(left, right)
  7467. ast_func_substring_2, // substring(left, right)
  7468. ast_func_substring_3, // substring(left, right, third)
  7469. ast_func_string_length_0, // string-length()
  7470. ast_func_string_length_1, // string-length(left)
  7471. ast_func_normalize_space_0, // normalize-space()
  7472. ast_func_normalize_space_1, // normalize-space(left)
  7473. ast_func_translate, // translate(left, right, third)
  7474. ast_func_boolean, // boolean(left)
  7475. ast_func_not, // not(left)
  7476. ast_func_true, // true()
  7477. ast_func_false, // false()
  7478. ast_func_lang, // lang(left)
  7479. ast_func_number_0, // number()
  7480. ast_func_number_1, // number(left)
  7481. ast_func_sum, // sum(left)
  7482. ast_func_floor, // floor(left)
  7483. ast_func_ceiling, // ceiling(left)
  7484. ast_func_round, // round(left)
  7485. ast_step, // process set left with step
  7486. ast_step_root, // select root node
  7487. ast_opt_translate_table, // translate(left, right, third) where right/third are constants
  7488. ast_opt_compare_attribute // @name = 'string'
  7489. };
  7490. enum axis_t
  7491. {
  7492. axis_ancestor,
  7493. axis_ancestor_or_self,
  7494. axis_attribute,
  7495. axis_child,
  7496. axis_descendant,
  7497. axis_descendant_or_self,
  7498. axis_following,
  7499. axis_following_sibling,
  7500. axis_namespace,
  7501. axis_parent,
  7502. axis_preceding,
  7503. axis_preceding_sibling,
  7504. axis_self
  7505. };
  7506. enum nodetest_t
  7507. {
  7508. nodetest_none,
  7509. nodetest_name,
  7510. nodetest_type_node,
  7511. nodetest_type_comment,
  7512. nodetest_type_pi,
  7513. nodetest_type_text,
  7514. nodetest_pi,
  7515. nodetest_all,
  7516. nodetest_all_in_namespace
  7517. };
  7518. enum predicate_t
  7519. {
  7520. predicate_default,
  7521. predicate_posinv,
  7522. predicate_constant,
  7523. predicate_constant_one
  7524. };
  7525. enum nodeset_eval_t
  7526. {
  7527. nodeset_eval_all,
  7528. nodeset_eval_any,
  7529. nodeset_eval_first
  7530. };
  7531. template <axis_t N> struct axis_to_type
  7532. {
  7533. static const axis_t axis;
  7534. };
  7535. template <axis_t N> const axis_t axis_to_type<N>::axis = N;
  7536. class xpath_ast_node
  7537. {
  7538. private:
  7539. // node type
  7540. char _type;
  7541. char _rettype;
  7542. // for ast_step
  7543. char _axis;
  7544. // for ast_step/ast_predicate/ast_filter
  7545. char _test;
  7546. // tree node structure
  7547. xpath_ast_node* _left;
  7548. xpath_ast_node* _right;
  7549. xpath_ast_node* _next;
  7550. union
  7551. {
  7552. // value for ast_string_constant
  7553. const char_t* string;
  7554. // value for ast_number_constant
  7555. double number;
  7556. // variable for ast_variable
  7557. xpath_variable* variable;
  7558. // node test for ast_step (node name/namespace/node type/pi target)
  7559. const char_t* nodetest;
  7560. // table for ast_opt_translate_table
  7561. const unsigned char* table;
  7562. } _data;
  7563. xpath_ast_node(const xpath_ast_node&);
  7564. xpath_ast_node& operator=(const xpath_ast_node&);
  7565. template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7566. {
  7567. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7568. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7569. {
  7570. if (lt == xpath_type_boolean || rt == xpath_type_boolean)
  7571. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7572. else if (lt == xpath_type_number || rt == xpath_type_number)
  7573. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7574. else if (lt == xpath_type_string || rt == xpath_type_string)
  7575. {
  7576. xpath_allocator_capture cr(stack.result);
  7577. xpath_string ls = lhs->eval_string(c, stack);
  7578. xpath_string rs = rhs->eval_string(c, stack);
  7579. return comp(ls, rs);
  7580. }
  7581. }
  7582. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7583. {
  7584. xpath_allocator_capture cr(stack.result);
  7585. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7586. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7587. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7588. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7589. {
  7590. xpath_allocator_capture cri(stack.result);
  7591. if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
  7592. return true;
  7593. }
  7594. return false;
  7595. }
  7596. else
  7597. {
  7598. if (lt == xpath_type_node_set)
  7599. {
  7600. swap(lhs, rhs);
  7601. swap(lt, rt);
  7602. }
  7603. if (lt == xpath_type_boolean)
  7604. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7605. else if (lt == xpath_type_number)
  7606. {
  7607. xpath_allocator_capture cr(stack.result);
  7608. double l = lhs->eval_number(c, stack);
  7609. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7610. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7611. {
  7612. xpath_allocator_capture cri(stack.result);
  7613. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7614. return true;
  7615. }
  7616. return false;
  7617. }
  7618. else if (lt == xpath_type_string)
  7619. {
  7620. xpath_allocator_capture cr(stack.result);
  7621. xpath_string l = lhs->eval_string(c, stack);
  7622. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7623. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7624. {
  7625. xpath_allocator_capture cri(stack.result);
  7626. if (comp(l, string_value(*ri, stack.result)))
  7627. return true;
  7628. }
  7629. return false;
  7630. }
  7631. }
  7632. assert(false && "Wrong types"); // unreachable
  7633. return false;
  7634. }
  7635. static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
  7636. {
  7637. return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
  7638. }
  7639. template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7640. {
  7641. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7642. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7643. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7644. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7645. {
  7646. xpath_allocator_capture cr(stack.result);
  7647. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7648. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7649. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7650. {
  7651. xpath_allocator_capture cri(stack.result);
  7652. double l = convert_string_to_number(string_value(*li, stack.result).c_str());
  7653. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7654. {
  7655. xpath_allocator_capture crii(stack.result);
  7656. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7657. return true;
  7658. }
  7659. }
  7660. return false;
  7661. }
  7662. else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
  7663. {
  7664. xpath_allocator_capture cr(stack.result);
  7665. double l = lhs->eval_number(c, stack);
  7666. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7667. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7668. {
  7669. xpath_allocator_capture cri(stack.result);
  7670. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7671. return true;
  7672. }
  7673. return false;
  7674. }
  7675. else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
  7676. {
  7677. xpath_allocator_capture cr(stack.result);
  7678. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7679. double r = rhs->eval_number(c, stack);
  7680. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7681. {
  7682. xpath_allocator_capture cri(stack.result);
  7683. if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
  7684. return true;
  7685. }
  7686. return false;
  7687. }
  7688. else
  7689. {
  7690. assert(false && "Wrong types"); // unreachable
  7691. return false;
  7692. }
  7693. }
  7694. static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7695. {
  7696. assert(ns.size() >= first);
  7697. assert(expr->rettype() != xpath_type_number);
  7698. size_t i = 1;
  7699. size_t size = ns.size() - first;
  7700. xpath_node* last = ns.begin() + first;
  7701. // remove_if... or well, sort of
  7702. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7703. {
  7704. xpath_context c(*it, i, size);
  7705. if (expr->eval_boolean(c, stack))
  7706. {
  7707. *last++ = *it;
  7708. if (once) break;
  7709. }
  7710. }
  7711. ns.truncate(last);
  7712. }
  7713. static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7714. {
  7715. assert(ns.size() >= first);
  7716. assert(expr->rettype() == xpath_type_number);
  7717. size_t i = 1;
  7718. size_t size = ns.size() - first;
  7719. xpath_node* last = ns.begin() + first;
  7720. // remove_if... or well, sort of
  7721. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7722. {
  7723. xpath_context c(*it, i, size);
  7724. if (expr->eval_number(c, stack) == static_cast<double>(i))
  7725. {
  7726. *last++ = *it;
  7727. if (once) break;
  7728. }
  7729. }
  7730. ns.truncate(last);
  7731. }
  7732. static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
  7733. {
  7734. assert(ns.size() >= first);
  7735. assert(expr->rettype() == xpath_type_number);
  7736. size_t size = ns.size() - first;
  7737. xpath_node* last = ns.begin() + first;
  7738. xpath_context c(xpath_node(), 1, size);
  7739. double er = expr->eval_number(c, stack);
  7740. if (er >= 1.0 && er <= static_cast<double>(size))
  7741. {
  7742. size_t eri = static_cast<size_t>(er);
  7743. if (er == static_cast<double>(eri))
  7744. {
  7745. xpath_node r = last[eri - 1];
  7746. *last++ = r;
  7747. }
  7748. }
  7749. ns.truncate(last);
  7750. }
  7751. void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
  7752. {
  7753. if (ns.size() == first) return;
  7754. assert(_type == ast_filter || _type == ast_predicate);
  7755. if (_test == predicate_constant || _test == predicate_constant_one)
  7756. apply_predicate_number_const(ns, first, _right, stack);
  7757. else if (_right->rettype() == xpath_type_number)
  7758. apply_predicate_number(ns, first, _right, stack, once);
  7759. else
  7760. apply_predicate_boolean(ns, first, _right, stack, once);
  7761. }
  7762. void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
  7763. {
  7764. if (ns.size() == first) return;
  7765. bool last_once = eval_once(ns.type(), eval);
  7766. for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
  7767. pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
  7768. }
  7769. bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
  7770. {
  7771. assert(a);
  7772. const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
  7773. switch (_test)
  7774. {
  7775. case nodetest_name:
  7776. if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
  7777. {
  7778. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7779. return true;
  7780. }
  7781. break;
  7782. case nodetest_type_node:
  7783. case nodetest_all:
  7784. if (is_xpath_attribute(name))
  7785. {
  7786. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7787. return true;
  7788. }
  7789. break;
  7790. case nodetest_all_in_namespace:
  7791. if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
  7792. {
  7793. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7794. return true;
  7795. }
  7796. break;
  7797. default:
  7798. ;
  7799. }
  7800. return false;
  7801. }
  7802. bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
  7803. {
  7804. assert(n);
  7805. xml_node_type type = PUGI__NODETYPE(n);
  7806. switch (_test)
  7807. {
  7808. case nodetest_name:
  7809. if (type == node_element && n->name && strequal(n->name, _data.nodetest))
  7810. {
  7811. ns.push_back(xml_node(n), alloc);
  7812. return true;
  7813. }
  7814. break;
  7815. case nodetest_type_node:
  7816. ns.push_back(xml_node(n), alloc);
  7817. return true;
  7818. case nodetest_type_comment:
  7819. if (type == node_comment)
  7820. {
  7821. ns.push_back(xml_node(n), alloc);
  7822. return true;
  7823. }
  7824. break;
  7825. case nodetest_type_text:
  7826. if (type == node_pcdata || type == node_cdata)
  7827. {
  7828. ns.push_back(xml_node(n), alloc);
  7829. return true;
  7830. }
  7831. break;
  7832. case nodetest_type_pi:
  7833. if (type == node_pi)
  7834. {
  7835. ns.push_back(xml_node(n), alloc);
  7836. return true;
  7837. }
  7838. break;
  7839. case nodetest_pi:
  7840. if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
  7841. {
  7842. ns.push_back(xml_node(n), alloc);
  7843. return true;
  7844. }
  7845. break;
  7846. case nodetest_all:
  7847. if (type == node_element)
  7848. {
  7849. ns.push_back(xml_node(n), alloc);
  7850. return true;
  7851. }
  7852. break;
  7853. case nodetest_all_in_namespace:
  7854. if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
  7855. {
  7856. ns.push_back(xml_node(n), alloc);
  7857. return true;
  7858. }
  7859. break;
  7860. default:
  7861. assert(false && "Unknown axis"); // unreachable
  7862. }
  7863. return false;
  7864. }
  7865. template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
  7866. {
  7867. const axis_t axis = T::axis;
  7868. switch (axis)
  7869. {
  7870. case axis_attribute:
  7871. {
  7872. for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
  7873. if (step_push(ns, a, n, alloc) & once)
  7874. return;
  7875. break;
  7876. }
  7877. case axis_child:
  7878. {
  7879. for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
  7880. if (step_push(ns, c, alloc) & once)
  7881. return;
  7882. break;
  7883. }
  7884. case axis_descendant:
  7885. case axis_descendant_or_self:
  7886. {
  7887. if (axis == axis_descendant_or_self)
  7888. if (step_push(ns, n, alloc) & once)
  7889. return;
  7890. xml_node_struct* cur = n->first_child;
  7891. while (cur)
  7892. {
  7893. if (step_push(ns, cur, alloc) & once)
  7894. return;
  7895. if (cur->first_child)
  7896. cur = cur->first_child;
  7897. else
  7898. {
  7899. while (!cur->next_sibling)
  7900. {
  7901. cur = cur->parent;
  7902. if (cur == n) return;
  7903. }
  7904. cur = cur->next_sibling;
  7905. }
  7906. }
  7907. break;
  7908. }
  7909. case axis_following_sibling:
  7910. {
  7911. for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
  7912. if (step_push(ns, c, alloc) & once)
  7913. return;
  7914. break;
  7915. }
  7916. case axis_preceding_sibling:
  7917. {
  7918. for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
  7919. if (step_push(ns, c, alloc) & once)
  7920. return;
  7921. break;
  7922. }
  7923. case axis_following:
  7924. {
  7925. xml_node_struct* cur = n;
  7926. // exit from this node so that we don't include descendants
  7927. while (!cur->next_sibling)
  7928. {
  7929. cur = cur->parent;
  7930. if (!cur) return;
  7931. }
  7932. cur = cur->next_sibling;
  7933. while (cur)
  7934. {
  7935. if (step_push(ns, cur, alloc) & once)
  7936. return;
  7937. if (cur->first_child)
  7938. cur = cur->first_child;
  7939. else
  7940. {
  7941. while (!cur->next_sibling)
  7942. {
  7943. cur = cur->parent;
  7944. if (!cur) return;
  7945. }
  7946. cur = cur->next_sibling;
  7947. }
  7948. }
  7949. break;
  7950. }
  7951. case axis_preceding:
  7952. {
  7953. xml_node_struct* cur = n;
  7954. // exit from this node so that we don't include descendants
  7955. while (!cur->prev_sibling_c->next_sibling)
  7956. {
  7957. cur = cur->parent;
  7958. if (!cur) return;
  7959. }
  7960. cur = cur->prev_sibling_c;
  7961. while (cur)
  7962. {
  7963. if (cur->first_child)
  7964. cur = cur->first_child->prev_sibling_c;
  7965. else
  7966. {
  7967. // leaf node, can't be ancestor
  7968. if (step_push(ns, cur, alloc) & once)
  7969. return;
  7970. while (!cur->prev_sibling_c->next_sibling)
  7971. {
  7972. cur = cur->parent;
  7973. if (!cur) return;
  7974. if (!node_is_ancestor(cur, n))
  7975. if (step_push(ns, cur, alloc) & once)
  7976. return;
  7977. }
  7978. cur = cur->prev_sibling_c;
  7979. }
  7980. }
  7981. break;
  7982. }
  7983. case axis_ancestor:
  7984. case axis_ancestor_or_self:
  7985. {
  7986. if (axis == axis_ancestor_or_self)
  7987. if (step_push(ns, n, alloc) & once)
  7988. return;
  7989. xml_node_struct* cur = n->parent;
  7990. while (cur)
  7991. {
  7992. if (step_push(ns, cur, alloc) & once)
  7993. return;
  7994. cur = cur->parent;
  7995. }
  7996. break;
  7997. }
  7998. case axis_self:
  7999. {
  8000. step_push(ns, n, alloc);
  8001. break;
  8002. }
  8003. case axis_parent:
  8004. {
  8005. if (n->parent)
  8006. step_push(ns, n->parent, alloc);
  8007. break;
  8008. }
  8009. default:
  8010. assert(false && "Unimplemented axis"); // unreachable
  8011. }
  8012. }
  8013. template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
  8014. {
  8015. const axis_t axis = T::axis;
  8016. switch (axis)
  8017. {
  8018. case axis_ancestor:
  8019. case axis_ancestor_or_self:
  8020. {
  8021. if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
  8022. if (step_push(ns, a, p, alloc) & once)
  8023. return;
  8024. xml_node_struct* cur = p;
  8025. while (cur)
  8026. {
  8027. if (step_push(ns, cur, alloc) & once)
  8028. return;
  8029. cur = cur->parent;
  8030. }
  8031. break;
  8032. }
  8033. case axis_descendant_or_self:
  8034. case axis_self:
  8035. {
  8036. if (_test == nodetest_type_node) // reject attributes based on principal node type test
  8037. step_push(ns, a, p, alloc);
  8038. break;
  8039. }
  8040. case axis_following:
  8041. {
  8042. xml_node_struct* cur = p;
  8043. while (cur)
  8044. {
  8045. if (cur->first_child)
  8046. cur = cur->first_child;
  8047. else
  8048. {
  8049. while (!cur->next_sibling)
  8050. {
  8051. cur = cur->parent;
  8052. if (!cur) return;
  8053. }
  8054. cur = cur->next_sibling;
  8055. }
  8056. if (step_push(ns, cur, alloc) & once)
  8057. return;
  8058. }
  8059. break;
  8060. }
  8061. case axis_parent:
  8062. {
  8063. step_push(ns, p, alloc);
  8064. break;
  8065. }
  8066. case axis_preceding:
  8067. {
  8068. // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
  8069. step_fill(ns, p, alloc, once, v);
  8070. break;
  8071. }
  8072. default:
  8073. assert(false && "Unimplemented axis"); // unreachable
  8074. }
  8075. }
  8076. template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
  8077. {
  8078. const axis_t axis = T::axis;
  8079. const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
  8080. if (xn.node())
  8081. step_fill(ns, xn.node().internal_object(), alloc, once, v);
  8082. else if (axis_has_attributes && xn.attribute() && xn.parent())
  8083. step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
  8084. }
  8085. template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
  8086. {
  8087. const axis_t axis = T::axis;
  8088. const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
  8089. const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  8090. bool once =
  8091. (axis == axis_attribute && _test == nodetest_name) ||
  8092. (!_right && eval_once(axis_type, eval)) ||
  8093. // coverity[mixed_enums]
  8094. (_right && !_right->_next && _right->_test == predicate_constant_one);
  8095. xpath_node_set_raw ns;
  8096. ns.set_type(axis_type);
  8097. if (_left)
  8098. {
  8099. xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
  8100. // self axis preserves the original order
  8101. if (axis == axis_self) ns.set_type(s.type());
  8102. for (const xpath_node* it = s.begin(); it != s.end(); ++it)
  8103. {
  8104. size_t size = ns.size();
  8105. // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
  8106. if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
  8107. step_fill(ns, *it, stack.result, once, v);
  8108. if (_right) apply_predicates(ns, size, stack, eval);
  8109. }
  8110. }
  8111. else
  8112. {
  8113. step_fill(ns, c.n, stack.result, once, v);
  8114. if (_right) apply_predicates(ns, 0, stack, eval);
  8115. }
  8116. // child, attribute and self axes always generate unique set of nodes
  8117. // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
  8118. if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
  8119. ns.remove_duplicates(stack.temp);
  8120. return ns;
  8121. }
  8122. public:
  8123. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
  8124. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8125. {
  8126. assert(type == ast_string_constant);
  8127. _data.string = value;
  8128. }
  8129. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
  8130. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8131. {
  8132. assert(type == ast_number_constant);
  8133. _data.number = value;
  8134. }
  8135. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
  8136. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8137. {
  8138. assert(type == ast_variable);
  8139. _data.variable = value;
  8140. }
  8141. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
  8142. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
  8143. {
  8144. }
  8145. xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
  8146. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
  8147. {
  8148. assert(type == ast_step);
  8149. _data.nodetest = contents;
  8150. }
  8151. xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
  8152. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
  8153. {
  8154. assert(type == ast_filter || type == ast_predicate);
  8155. }
  8156. void set_next(xpath_ast_node* value)
  8157. {
  8158. _next = value;
  8159. }
  8160. void set_right(xpath_ast_node* value)
  8161. {
  8162. _right = value;
  8163. }
  8164. bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
  8165. {
  8166. switch (_type)
  8167. {
  8168. case ast_op_or:
  8169. return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
  8170. case ast_op_and:
  8171. return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
  8172. case ast_op_equal:
  8173. return compare_eq(_left, _right, c, stack, equal_to());
  8174. case ast_op_not_equal:
  8175. return compare_eq(_left, _right, c, stack, not_equal_to());
  8176. case ast_op_less:
  8177. return compare_rel(_left, _right, c, stack, less());
  8178. case ast_op_greater:
  8179. return compare_rel(_right, _left, c, stack, less());
  8180. case ast_op_less_or_equal:
  8181. return compare_rel(_left, _right, c, stack, less_equal());
  8182. case ast_op_greater_or_equal:
  8183. return compare_rel(_right, _left, c, stack, less_equal());
  8184. case ast_func_starts_with:
  8185. {
  8186. xpath_allocator_capture cr(stack.result);
  8187. xpath_string lr = _left->eval_string(c, stack);
  8188. xpath_string rr = _right->eval_string(c, stack);
  8189. return starts_with(lr.c_str(), rr.c_str());
  8190. }
  8191. case ast_func_contains:
  8192. {
  8193. xpath_allocator_capture cr(stack.result);
  8194. xpath_string lr = _left->eval_string(c, stack);
  8195. xpath_string rr = _right->eval_string(c, stack);
  8196. return find_substring(lr.c_str(), rr.c_str()) != 0;
  8197. }
  8198. case ast_func_boolean:
  8199. return _left->eval_boolean(c, stack);
  8200. case ast_func_not:
  8201. return !_left->eval_boolean(c, stack);
  8202. case ast_func_true:
  8203. return true;
  8204. case ast_func_false:
  8205. return false;
  8206. case ast_func_lang:
  8207. {
  8208. if (c.n.attribute()) return false;
  8209. xpath_allocator_capture cr(stack.result);
  8210. xpath_string lang = _left->eval_string(c, stack);
  8211. for (xml_node n = c.n.node(); n; n = n.parent())
  8212. {
  8213. xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
  8214. if (a)
  8215. {
  8216. const char_t* value = a.value();
  8217. // strnicmp / strncasecmp is not portable
  8218. for (const char_t* lit = lang.c_str(); *lit; ++lit)
  8219. {
  8220. if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
  8221. ++value;
  8222. }
  8223. return *value == 0 || *value == '-';
  8224. }
  8225. }
  8226. return false;
  8227. }
  8228. case ast_opt_compare_attribute:
  8229. {
  8230. const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
  8231. xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
  8232. return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
  8233. }
  8234. case ast_variable:
  8235. {
  8236. assert(_rettype == _data.variable->type());
  8237. if (_rettype == xpath_type_boolean)
  8238. return _data.variable->get_boolean();
  8239. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8240. break;
  8241. }
  8242. default:
  8243. ;
  8244. }
  8245. // none of the ast types that return the value directly matched, we need to perform type conversion
  8246. switch (_rettype)
  8247. {
  8248. case xpath_type_number:
  8249. return convert_number_to_boolean(eval_number(c, stack));
  8250. case xpath_type_string:
  8251. {
  8252. xpath_allocator_capture cr(stack.result);
  8253. return !eval_string(c, stack).empty();
  8254. }
  8255. case xpath_type_node_set:
  8256. {
  8257. xpath_allocator_capture cr(stack.result);
  8258. return !eval_node_set(c, stack, nodeset_eval_any).empty();
  8259. }
  8260. default:
  8261. assert(false && "Wrong expression for return type boolean"); // unreachable
  8262. return false;
  8263. }
  8264. }
  8265. double eval_number(const xpath_context& c, const xpath_stack& stack)
  8266. {
  8267. switch (_type)
  8268. {
  8269. case ast_op_add:
  8270. return _left->eval_number(c, stack) + _right->eval_number(c, stack);
  8271. case ast_op_subtract:
  8272. return _left->eval_number(c, stack) - _right->eval_number(c, stack);
  8273. case ast_op_multiply:
  8274. return _left->eval_number(c, stack) * _right->eval_number(c, stack);
  8275. case ast_op_divide:
  8276. return _left->eval_number(c, stack) / _right->eval_number(c, stack);
  8277. case ast_op_mod:
  8278. return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
  8279. case ast_op_negate:
  8280. return -_left->eval_number(c, stack);
  8281. case ast_number_constant:
  8282. return _data.number;
  8283. case ast_func_last:
  8284. return static_cast<double>(c.size);
  8285. case ast_func_position:
  8286. return static_cast<double>(c.position);
  8287. case ast_func_count:
  8288. {
  8289. xpath_allocator_capture cr(stack.result);
  8290. return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
  8291. }
  8292. case ast_func_string_length_0:
  8293. {
  8294. xpath_allocator_capture cr(stack.result);
  8295. return static_cast<double>(string_value(c.n, stack.result).length());
  8296. }
  8297. case ast_func_string_length_1:
  8298. {
  8299. xpath_allocator_capture cr(stack.result);
  8300. return static_cast<double>(_left->eval_string(c, stack).length());
  8301. }
  8302. case ast_func_number_0:
  8303. {
  8304. xpath_allocator_capture cr(stack.result);
  8305. return convert_string_to_number(string_value(c.n, stack.result).c_str());
  8306. }
  8307. case ast_func_number_1:
  8308. return _left->eval_number(c, stack);
  8309. case ast_func_sum:
  8310. {
  8311. xpath_allocator_capture cr(stack.result);
  8312. double r = 0;
  8313. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
  8314. for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
  8315. {
  8316. xpath_allocator_capture cri(stack.result);
  8317. r += convert_string_to_number(string_value(*it, stack.result).c_str());
  8318. }
  8319. return r;
  8320. }
  8321. case ast_func_floor:
  8322. {
  8323. double r = _left->eval_number(c, stack);
  8324. return r == r ? floor(r) : r;
  8325. }
  8326. case ast_func_ceiling:
  8327. {
  8328. double r = _left->eval_number(c, stack);
  8329. return r == r ? ceil(r) : r;
  8330. }
  8331. case ast_func_round:
  8332. return round_nearest_nzero(_left->eval_number(c, stack));
  8333. case ast_variable:
  8334. {
  8335. assert(_rettype == _data.variable->type());
  8336. if (_rettype == xpath_type_number)
  8337. return _data.variable->get_number();
  8338. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8339. break;
  8340. }
  8341. default:
  8342. ;
  8343. }
  8344. // none of the ast types that return the value directly matched, we need to perform type conversion
  8345. switch (_rettype)
  8346. {
  8347. case xpath_type_boolean:
  8348. return eval_boolean(c, stack) ? 1 : 0;
  8349. case xpath_type_string:
  8350. {
  8351. xpath_allocator_capture cr(stack.result);
  8352. return convert_string_to_number(eval_string(c, stack).c_str());
  8353. }
  8354. case xpath_type_node_set:
  8355. {
  8356. xpath_allocator_capture cr(stack.result);
  8357. return convert_string_to_number(eval_string(c, stack).c_str());
  8358. }
  8359. default:
  8360. assert(false && "Wrong expression for return type number"); // unreachable
  8361. return 0;
  8362. }
  8363. }
  8364. xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
  8365. {
  8366. assert(_type == ast_func_concat);
  8367. xpath_allocator_capture ct(stack.temp);
  8368. // count the string number
  8369. size_t count = 1;
  8370. for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
  8371. // allocate a buffer for temporary string objects
  8372. xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
  8373. if (!buffer) return xpath_string();
  8374. // evaluate all strings to temporary stack
  8375. xpath_stack swapped_stack = {stack.temp, stack.result};
  8376. buffer[0] = _left->eval_string(c, swapped_stack);
  8377. size_t pos = 1;
  8378. for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
  8379. assert(pos == count);
  8380. // get total length
  8381. size_t length = 0;
  8382. for (size_t i = 0; i < count; ++i) length += buffer[i].length();
  8383. // create final string
  8384. char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
  8385. if (!result) return xpath_string();
  8386. char_t* ri = result;
  8387. for (size_t j = 0; j < count; ++j)
  8388. for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
  8389. *ri++ = *bi;
  8390. *ri = 0;
  8391. return xpath_string::from_heap_preallocated(result, ri);
  8392. }
  8393. xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
  8394. {
  8395. switch (_type)
  8396. {
  8397. case ast_string_constant:
  8398. return xpath_string::from_const(_data.string);
  8399. case ast_func_local_name_0:
  8400. {
  8401. xpath_node na = c.n;
  8402. return xpath_string::from_const(local_name(na));
  8403. }
  8404. case ast_func_local_name_1:
  8405. {
  8406. xpath_allocator_capture cr(stack.result);
  8407. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8408. xpath_node na = ns.first();
  8409. return xpath_string::from_const(local_name(na));
  8410. }
  8411. case ast_func_name_0:
  8412. {
  8413. xpath_node na = c.n;
  8414. return xpath_string::from_const(qualified_name(na));
  8415. }
  8416. case ast_func_name_1:
  8417. {
  8418. xpath_allocator_capture cr(stack.result);
  8419. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8420. xpath_node na = ns.first();
  8421. return xpath_string::from_const(qualified_name(na));
  8422. }
  8423. case ast_func_namespace_uri_0:
  8424. {
  8425. xpath_node na = c.n;
  8426. return xpath_string::from_const(namespace_uri(na));
  8427. }
  8428. case ast_func_namespace_uri_1:
  8429. {
  8430. xpath_allocator_capture cr(stack.result);
  8431. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8432. xpath_node na = ns.first();
  8433. return xpath_string::from_const(namespace_uri(na));
  8434. }
  8435. case ast_func_string_0:
  8436. return string_value(c.n, stack.result);
  8437. case ast_func_string_1:
  8438. return _left->eval_string(c, stack);
  8439. case ast_func_concat:
  8440. return eval_string_concat(c, stack);
  8441. case ast_func_substring_before:
  8442. {
  8443. xpath_allocator_capture cr(stack.temp);
  8444. xpath_stack swapped_stack = {stack.temp, stack.result};
  8445. xpath_string s = _left->eval_string(c, swapped_stack);
  8446. xpath_string p = _right->eval_string(c, swapped_stack);
  8447. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8448. return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
  8449. }
  8450. case ast_func_substring_after:
  8451. {
  8452. xpath_allocator_capture cr(stack.temp);
  8453. xpath_stack swapped_stack = {stack.temp, stack.result};
  8454. xpath_string s = _left->eval_string(c, swapped_stack);
  8455. xpath_string p = _right->eval_string(c, swapped_stack);
  8456. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8457. if (!pos) return xpath_string();
  8458. const char_t* rbegin = pos + p.length();
  8459. const char_t* rend = s.c_str() + s.length();
  8460. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8461. }
  8462. case ast_func_substring_2:
  8463. {
  8464. xpath_allocator_capture cr(stack.temp);
  8465. xpath_stack swapped_stack = {stack.temp, stack.result};
  8466. xpath_string s = _left->eval_string(c, swapped_stack);
  8467. size_t s_length = s.length();
  8468. double first = round_nearest(_right->eval_number(c, stack));
  8469. if (is_nan(first)) return xpath_string(); // NaN
  8470. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8471. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8472. assert(1 <= pos && pos <= s_length + 1);
  8473. const char_t* rbegin = s.c_str() + (pos - 1);
  8474. const char_t* rend = s.c_str() + s.length();
  8475. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8476. }
  8477. case ast_func_substring_3:
  8478. {
  8479. xpath_allocator_capture cr(stack.temp);
  8480. xpath_stack swapped_stack = {stack.temp, stack.result};
  8481. xpath_string s = _left->eval_string(c, swapped_stack);
  8482. size_t s_length = s.length();
  8483. double first = round_nearest(_right->eval_number(c, stack));
  8484. double last = first + round_nearest(_right->_next->eval_number(c, stack));
  8485. if (is_nan(first) || is_nan(last)) return xpath_string();
  8486. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8487. else if (first >= last) return xpath_string();
  8488. else if (last < 1) return xpath_string();
  8489. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8490. size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
  8491. assert(1 <= pos && pos <= end && end <= s_length + 1);
  8492. const char_t* rbegin = s.c_str() + (pos - 1);
  8493. const char_t* rend = s.c_str() + (end - 1);
  8494. return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
  8495. }
  8496. case ast_func_normalize_space_0:
  8497. {
  8498. xpath_string s = string_value(c.n, stack.result);
  8499. char_t* begin = s.data(stack.result);
  8500. if (!begin) return xpath_string();
  8501. char_t* end = normalize_space(begin);
  8502. return xpath_string::from_heap_preallocated(begin, end);
  8503. }
  8504. case ast_func_normalize_space_1:
  8505. {
  8506. xpath_string s = _left->eval_string(c, stack);
  8507. char_t* begin = s.data(stack.result);
  8508. if (!begin) return xpath_string();
  8509. char_t* end = normalize_space(begin);
  8510. return xpath_string::from_heap_preallocated(begin, end);
  8511. }
  8512. case ast_func_translate:
  8513. {
  8514. xpath_allocator_capture cr(stack.temp);
  8515. xpath_stack swapped_stack = {stack.temp, stack.result};
  8516. xpath_string s = _left->eval_string(c, stack);
  8517. xpath_string from = _right->eval_string(c, swapped_stack);
  8518. xpath_string to = _right->_next->eval_string(c, swapped_stack);
  8519. char_t* begin = s.data(stack.result);
  8520. if (!begin) return xpath_string();
  8521. char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
  8522. return xpath_string::from_heap_preallocated(begin, end);
  8523. }
  8524. case ast_opt_translate_table:
  8525. {
  8526. xpath_string s = _left->eval_string(c, stack);
  8527. char_t* begin = s.data(stack.result);
  8528. if (!begin) return xpath_string();
  8529. char_t* end = translate_table(begin, _data.table);
  8530. return xpath_string::from_heap_preallocated(begin, end);
  8531. }
  8532. case ast_variable:
  8533. {
  8534. assert(_rettype == _data.variable->type());
  8535. if (_rettype == xpath_type_string)
  8536. return xpath_string::from_const(_data.variable->get_string());
  8537. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8538. break;
  8539. }
  8540. default:
  8541. ;
  8542. }
  8543. // none of the ast types that return the value directly matched, we need to perform type conversion
  8544. switch (_rettype)
  8545. {
  8546. case xpath_type_boolean:
  8547. return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
  8548. case xpath_type_number:
  8549. return convert_number_to_string(eval_number(c, stack), stack.result);
  8550. case xpath_type_node_set:
  8551. {
  8552. xpath_allocator_capture cr(stack.temp);
  8553. xpath_stack swapped_stack = {stack.temp, stack.result};
  8554. xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
  8555. return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
  8556. }
  8557. default:
  8558. assert(false && "Wrong expression for return type string"); // unreachable
  8559. return xpath_string();
  8560. }
  8561. }
  8562. xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
  8563. {
  8564. switch (_type)
  8565. {
  8566. case ast_op_union:
  8567. {
  8568. xpath_allocator_capture cr(stack.temp);
  8569. xpath_stack swapped_stack = {stack.temp, stack.result};
  8570. xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
  8571. xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
  8572. // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
  8573. ls.set_type(xpath_node_set::type_unsorted);
  8574. ls.append(rs.begin(), rs.end(), stack.result);
  8575. ls.remove_duplicates(stack.temp);
  8576. return ls;
  8577. }
  8578. case ast_filter:
  8579. {
  8580. xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
  8581. // either expression is a number or it contains position() call; sort by document order
  8582. if (_test != predicate_posinv) set.sort_do();
  8583. bool once = eval_once(set.type(), eval);
  8584. apply_predicate(set, 0, stack, once);
  8585. return set;
  8586. }
  8587. case ast_func_id:
  8588. return xpath_node_set_raw();
  8589. case ast_step:
  8590. {
  8591. switch (_axis)
  8592. {
  8593. case axis_ancestor:
  8594. return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
  8595. case axis_ancestor_or_self:
  8596. return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
  8597. case axis_attribute:
  8598. return step_do(c, stack, eval, axis_to_type<axis_attribute>());
  8599. case axis_child:
  8600. return step_do(c, stack, eval, axis_to_type<axis_child>());
  8601. case axis_descendant:
  8602. return step_do(c, stack, eval, axis_to_type<axis_descendant>());
  8603. case axis_descendant_or_self:
  8604. return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
  8605. case axis_following:
  8606. return step_do(c, stack, eval, axis_to_type<axis_following>());
  8607. case axis_following_sibling:
  8608. return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
  8609. case axis_namespace:
  8610. // namespaced axis is not supported
  8611. return xpath_node_set_raw();
  8612. case axis_parent:
  8613. return step_do(c, stack, eval, axis_to_type<axis_parent>());
  8614. case axis_preceding:
  8615. return step_do(c, stack, eval, axis_to_type<axis_preceding>());
  8616. case axis_preceding_sibling:
  8617. return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
  8618. case axis_self:
  8619. return step_do(c, stack, eval, axis_to_type<axis_self>());
  8620. default:
  8621. assert(false && "Unknown axis"); // unreachable
  8622. return xpath_node_set_raw();
  8623. }
  8624. }
  8625. case ast_step_root:
  8626. {
  8627. assert(!_right); // root step can't have any predicates
  8628. xpath_node_set_raw ns;
  8629. ns.set_type(xpath_node_set::type_sorted);
  8630. if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
  8631. else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
  8632. return ns;
  8633. }
  8634. case ast_variable:
  8635. {
  8636. assert(_rettype == _data.variable->type());
  8637. if (_rettype == xpath_type_node_set)
  8638. {
  8639. const xpath_node_set& s = _data.variable->get_node_set();
  8640. xpath_node_set_raw ns;
  8641. ns.set_type(s.type());
  8642. ns.append(s.begin(), s.end(), stack.result);
  8643. return ns;
  8644. }
  8645. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8646. break;
  8647. }
  8648. default:
  8649. ;
  8650. }
  8651. // none of the ast types that return the value directly matched, but conversions to node set are invalid
  8652. assert(false && "Wrong expression for return type node set"); // unreachable
  8653. return xpath_node_set_raw();
  8654. }
  8655. void optimize(xpath_allocator* alloc)
  8656. {
  8657. if (_left)
  8658. _left->optimize(alloc);
  8659. if (_right)
  8660. _right->optimize(alloc);
  8661. if (_next)
  8662. _next->optimize(alloc);
  8663. // coverity[var_deref_model]
  8664. optimize_self(alloc);
  8665. }
  8666. void optimize_self(xpath_allocator* alloc)
  8667. {
  8668. // Rewrite [position()=expr] with [expr]
  8669. // Note that this step has to go before classification to recognize [position()=1]
  8670. if ((_type == ast_filter || _type == ast_predicate) &&
  8671. _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8672. _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
  8673. {
  8674. _right = _right->_right;
  8675. }
  8676. // Classify filter/predicate ops to perform various optimizations during evaluation
  8677. if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8678. {
  8679. assert(_test == predicate_default);
  8680. if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
  8681. _test = predicate_constant_one;
  8682. else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
  8683. _test = predicate_constant;
  8684. else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
  8685. _test = predicate_posinv;
  8686. }
  8687. // Rewrite descendant-or-self::node()/child::foo with descendant::foo
  8688. // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
  8689. // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
  8690. // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
  8691. if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
  8692. _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
  8693. is_posinv_step())
  8694. {
  8695. if (_axis == axis_child || _axis == axis_descendant)
  8696. _axis = axis_descendant;
  8697. else
  8698. _axis = axis_descendant_or_self;
  8699. _left = _left->_left;
  8700. }
  8701. // Use optimized lookup table implementation for translate() with constant arguments
  8702. if (_type == ast_func_translate &&
  8703. _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
  8704. _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
  8705. {
  8706. unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
  8707. if (table)
  8708. {
  8709. _type = ast_opt_translate_table;
  8710. _data.table = table;
  8711. }
  8712. }
  8713. // Use optimized path for @attr = 'value' or @attr = $value
  8714. if (_type == ast_op_equal &&
  8715. _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
  8716. // coverity[mixed_enums]
  8717. _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
  8718. (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
  8719. {
  8720. _type = ast_opt_compare_attribute;
  8721. }
  8722. }
  8723. bool is_posinv_expr() const
  8724. {
  8725. switch (_type)
  8726. {
  8727. case ast_func_position:
  8728. case ast_func_last:
  8729. return false;
  8730. case ast_string_constant:
  8731. case ast_number_constant:
  8732. case ast_variable:
  8733. return true;
  8734. case ast_step:
  8735. case ast_step_root:
  8736. return true;
  8737. case ast_predicate:
  8738. case ast_filter:
  8739. return true;
  8740. default:
  8741. if (_left && !_left->is_posinv_expr()) return false;
  8742. for (xpath_ast_node* n = _right; n; n = n->_next)
  8743. if (!n->is_posinv_expr()) return false;
  8744. return true;
  8745. }
  8746. }
  8747. bool is_posinv_step() const
  8748. {
  8749. assert(_type == ast_step);
  8750. for (xpath_ast_node* n = _right; n; n = n->_next)
  8751. {
  8752. assert(n->_type == ast_predicate);
  8753. if (n->_test != predicate_posinv)
  8754. return false;
  8755. }
  8756. return true;
  8757. }
  8758. xpath_value_type rettype() const
  8759. {
  8760. return static_cast<xpath_value_type>(_rettype);
  8761. }
  8762. };
  8763. static const size_t xpath_ast_depth_limit =
  8764. #ifdef PUGIXML_XPATH_DEPTH_LIMIT
  8765. PUGIXML_XPATH_DEPTH_LIMIT
  8766. #else
  8767. 1024
  8768. #endif
  8769. ;
  8770. struct xpath_parser
  8771. {
  8772. xpath_allocator* _alloc;
  8773. xpath_lexer _lexer;
  8774. const char_t* _query;
  8775. xpath_variable_set* _variables;
  8776. xpath_parse_result* _result;
  8777. char_t _scratch[32];
  8778. size_t _depth;
  8779. xpath_ast_node* error(const char* message)
  8780. {
  8781. _result->error = message;
  8782. _result->offset = _lexer.current_pos() - _query;
  8783. return 0;
  8784. }
  8785. xpath_ast_node* error_oom()
  8786. {
  8787. assert(_alloc->_error);
  8788. *_alloc->_error = true;
  8789. return 0;
  8790. }
  8791. xpath_ast_node* error_rec()
  8792. {
  8793. return error("Exceeded maximum allowed query depth");
  8794. }
  8795. void* alloc_node()
  8796. {
  8797. return _alloc->allocate(sizeof(xpath_ast_node));
  8798. }
  8799. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
  8800. {
  8801. void* memory = alloc_node();
  8802. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8803. }
  8804. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
  8805. {
  8806. void* memory = alloc_node();
  8807. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8808. }
  8809. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
  8810. {
  8811. void* memory = alloc_node();
  8812. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8813. }
  8814. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
  8815. {
  8816. void* memory = alloc_node();
  8817. return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
  8818. }
  8819. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
  8820. {
  8821. void* memory = alloc_node();
  8822. return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
  8823. }
  8824. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
  8825. {
  8826. void* memory = alloc_node();
  8827. return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
  8828. }
  8829. const char_t* alloc_string(const xpath_lexer_string& value)
  8830. {
  8831. if (!value.begin)
  8832. return PUGIXML_TEXT("");
  8833. size_t length = static_cast<size_t>(value.end - value.begin);
  8834. char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
  8835. if (!c) return 0;
  8836. memcpy(c, value.begin, length * sizeof(char_t));
  8837. c[length] = 0;
  8838. return c;
  8839. }
  8840. xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
  8841. {
  8842. switch (name.begin[0])
  8843. {
  8844. case 'b':
  8845. if (name == PUGIXML_TEXT("boolean") && argc == 1)
  8846. return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
  8847. break;
  8848. case 'c':
  8849. if (name == PUGIXML_TEXT("count") && argc == 1)
  8850. {
  8851. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8852. return alloc_node(ast_func_count, xpath_type_number, args[0]);
  8853. }
  8854. else if (name == PUGIXML_TEXT("contains") && argc == 2)
  8855. return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
  8856. else if (name == PUGIXML_TEXT("concat") && argc >= 2)
  8857. return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
  8858. else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
  8859. return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
  8860. break;
  8861. case 'f':
  8862. if (name == PUGIXML_TEXT("false") && argc == 0)
  8863. return alloc_node(ast_func_false, xpath_type_boolean);
  8864. else if (name == PUGIXML_TEXT("floor") && argc == 1)
  8865. return alloc_node(ast_func_floor, xpath_type_number, args[0]);
  8866. break;
  8867. case 'i':
  8868. if (name == PUGIXML_TEXT("id") && argc == 1)
  8869. return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
  8870. break;
  8871. case 'l':
  8872. if (name == PUGIXML_TEXT("last") && argc == 0)
  8873. return alloc_node(ast_func_last, xpath_type_number);
  8874. else if (name == PUGIXML_TEXT("lang") && argc == 1)
  8875. return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
  8876. else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
  8877. {
  8878. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8879. return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
  8880. }
  8881. break;
  8882. case 'n':
  8883. if (name == PUGIXML_TEXT("name") && argc <= 1)
  8884. {
  8885. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8886. return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
  8887. }
  8888. else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
  8889. {
  8890. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8891. return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
  8892. }
  8893. else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
  8894. return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
  8895. else if (name == PUGIXML_TEXT("not") && argc == 1)
  8896. return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
  8897. else if (name == PUGIXML_TEXT("number") && argc <= 1)
  8898. return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
  8899. break;
  8900. case 'p':
  8901. if (name == PUGIXML_TEXT("position") && argc == 0)
  8902. return alloc_node(ast_func_position, xpath_type_number);
  8903. break;
  8904. case 'r':
  8905. if (name == PUGIXML_TEXT("round") && argc == 1)
  8906. return alloc_node(ast_func_round, xpath_type_number, args[0]);
  8907. break;
  8908. case 's':
  8909. if (name == PUGIXML_TEXT("string") && argc <= 1)
  8910. return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
  8911. else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
  8912. return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
  8913. else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
  8914. return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
  8915. else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
  8916. return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
  8917. else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
  8918. return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
  8919. else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
  8920. return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
  8921. else if (name == PUGIXML_TEXT("sum") && argc == 1)
  8922. {
  8923. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8924. return alloc_node(ast_func_sum, xpath_type_number, args[0]);
  8925. }
  8926. break;
  8927. case 't':
  8928. if (name == PUGIXML_TEXT("translate") && argc == 3)
  8929. return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
  8930. else if (name == PUGIXML_TEXT("true") && argc == 0)
  8931. return alloc_node(ast_func_true, xpath_type_boolean);
  8932. break;
  8933. default:
  8934. break;
  8935. }
  8936. return error("Unrecognized function or wrong parameter count");
  8937. }
  8938. axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
  8939. {
  8940. specified = true;
  8941. switch (name.begin[0])
  8942. {
  8943. case 'a':
  8944. if (name == PUGIXML_TEXT("ancestor"))
  8945. return axis_ancestor;
  8946. else if (name == PUGIXML_TEXT("ancestor-or-self"))
  8947. return axis_ancestor_or_self;
  8948. else if (name == PUGIXML_TEXT("attribute"))
  8949. return axis_attribute;
  8950. break;
  8951. case 'c':
  8952. if (name == PUGIXML_TEXT("child"))
  8953. return axis_child;
  8954. break;
  8955. case 'd':
  8956. if (name == PUGIXML_TEXT("descendant"))
  8957. return axis_descendant;
  8958. else if (name == PUGIXML_TEXT("descendant-or-self"))
  8959. return axis_descendant_or_self;
  8960. break;
  8961. case 'f':
  8962. if (name == PUGIXML_TEXT("following"))
  8963. return axis_following;
  8964. else if (name == PUGIXML_TEXT("following-sibling"))
  8965. return axis_following_sibling;
  8966. break;
  8967. case 'n':
  8968. if (name == PUGIXML_TEXT("namespace"))
  8969. return axis_namespace;
  8970. break;
  8971. case 'p':
  8972. if (name == PUGIXML_TEXT("parent"))
  8973. return axis_parent;
  8974. else if (name == PUGIXML_TEXT("preceding"))
  8975. return axis_preceding;
  8976. else if (name == PUGIXML_TEXT("preceding-sibling"))
  8977. return axis_preceding_sibling;
  8978. break;
  8979. case 's':
  8980. if (name == PUGIXML_TEXT("self"))
  8981. return axis_self;
  8982. break;
  8983. default:
  8984. break;
  8985. }
  8986. specified = false;
  8987. return axis_child;
  8988. }
  8989. nodetest_t parse_node_test_type(const xpath_lexer_string& name)
  8990. {
  8991. switch (name.begin[0])
  8992. {
  8993. case 'c':
  8994. if (name == PUGIXML_TEXT("comment"))
  8995. return nodetest_type_comment;
  8996. break;
  8997. case 'n':
  8998. if (name == PUGIXML_TEXT("node"))
  8999. return nodetest_type_node;
  9000. break;
  9001. case 'p':
  9002. if (name == PUGIXML_TEXT("processing-instruction"))
  9003. return nodetest_type_pi;
  9004. break;
  9005. case 't':
  9006. if (name == PUGIXML_TEXT("text"))
  9007. return nodetest_type_text;
  9008. break;
  9009. default:
  9010. break;
  9011. }
  9012. return nodetest_none;
  9013. }
  9014. // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
  9015. xpath_ast_node* parse_primary_expression()
  9016. {
  9017. switch (_lexer.current())
  9018. {
  9019. case lex_var_ref:
  9020. {
  9021. xpath_lexer_string name = _lexer.contents();
  9022. if (!_variables)
  9023. return error("Unknown variable: variable set is not provided");
  9024. xpath_variable* var = 0;
  9025. if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
  9026. return error_oom();
  9027. if (!var)
  9028. return error("Unknown variable: variable set does not contain the given name");
  9029. _lexer.next();
  9030. return alloc_node(ast_variable, var->type(), var);
  9031. }
  9032. case lex_open_brace:
  9033. {
  9034. _lexer.next();
  9035. xpath_ast_node* n = parse_expression();
  9036. if (!n) return 0;
  9037. if (_lexer.current() != lex_close_brace)
  9038. return error("Expected ')' to match an opening '('");
  9039. _lexer.next();
  9040. return n;
  9041. }
  9042. case lex_quoted_string:
  9043. {
  9044. const char_t* value = alloc_string(_lexer.contents());
  9045. if (!value) return 0;
  9046. _lexer.next();
  9047. return alloc_node(ast_string_constant, xpath_type_string, value);
  9048. }
  9049. case lex_number:
  9050. {
  9051. double value = 0;
  9052. if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
  9053. return error_oom();
  9054. _lexer.next();
  9055. return alloc_node(ast_number_constant, xpath_type_number, value);
  9056. }
  9057. case lex_string:
  9058. {
  9059. xpath_ast_node* args[2] = {0};
  9060. size_t argc = 0;
  9061. xpath_lexer_string function = _lexer.contents();
  9062. _lexer.next();
  9063. xpath_ast_node* last_arg = 0;
  9064. if (_lexer.current() != lex_open_brace)
  9065. return error("Unrecognized function call");
  9066. _lexer.next();
  9067. size_t old_depth = _depth;
  9068. while (_lexer.current() != lex_close_brace)
  9069. {
  9070. if (argc > 0)
  9071. {
  9072. if (_lexer.current() != lex_comma)
  9073. return error("No comma between function arguments");
  9074. _lexer.next();
  9075. }
  9076. if (++_depth > xpath_ast_depth_limit)
  9077. return error_rec();
  9078. xpath_ast_node* n = parse_expression();
  9079. if (!n) return 0;
  9080. if (argc < 2) args[argc] = n;
  9081. else last_arg->set_next(n);
  9082. argc++;
  9083. last_arg = n;
  9084. }
  9085. _lexer.next();
  9086. _depth = old_depth;
  9087. return parse_function(function, argc, args);
  9088. }
  9089. default:
  9090. return error("Unrecognizable primary expression");
  9091. }
  9092. }
  9093. // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
  9094. // Predicate ::= '[' PredicateExpr ']'
  9095. // PredicateExpr ::= Expr
  9096. xpath_ast_node* parse_filter_expression()
  9097. {
  9098. xpath_ast_node* n = parse_primary_expression();
  9099. if (!n) return 0;
  9100. size_t old_depth = _depth;
  9101. while (_lexer.current() == lex_open_square_brace)
  9102. {
  9103. _lexer.next();
  9104. if (++_depth > xpath_ast_depth_limit)
  9105. return error_rec();
  9106. if (n->rettype() != xpath_type_node_set)
  9107. return error("Predicate has to be applied to node set");
  9108. xpath_ast_node* expr = parse_expression();
  9109. if (!expr) return 0;
  9110. n = alloc_node(ast_filter, n, expr, predicate_default);
  9111. if (!n) return 0;
  9112. if (_lexer.current() != lex_close_square_brace)
  9113. return error("Expected ']' to match an opening '['");
  9114. _lexer.next();
  9115. }
  9116. _depth = old_depth;
  9117. return n;
  9118. }
  9119. // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
  9120. // AxisSpecifier ::= AxisName '::' | '@'?
  9121. // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
  9122. // NameTest ::= '*' | NCName ':' '*' | QName
  9123. // AbbreviatedStep ::= '.' | '..'
  9124. xpath_ast_node* parse_step(xpath_ast_node* set)
  9125. {
  9126. if (set && set->rettype() != xpath_type_node_set)
  9127. return error("Step has to be applied to node set");
  9128. bool axis_specified = false;
  9129. axis_t axis = axis_child; // implied child axis
  9130. if (_lexer.current() == lex_axis_attribute)
  9131. {
  9132. axis = axis_attribute;
  9133. axis_specified = true;
  9134. _lexer.next();
  9135. }
  9136. else if (_lexer.current() == lex_dot)
  9137. {
  9138. _lexer.next();
  9139. if (_lexer.current() == lex_open_square_brace)
  9140. return error("Predicates are not allowed after an abbreviated step");
  9141. return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
  9142. }
  9143. else if (_lexer.current() == lex_double_dot)
  9144. {
  9145. _lexer.next();
  9146. if (_lexer.current() == lex_open_square_brace)
  9147. return error("Predicates are not allowed after an abbreviated step");
  9148. return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
  9149. }
  9150. nodetest_t nt_type = nodetest_none;
  9151. xpath_lexer_string nt_name;
  9152. if (_lexer.current() == lex_string)
  9153. {
  9154. // node name test
  9155. nt_name = _lexer.contents();
  9156. _lexer.next();
  9157. // was it an axis name?
  9158. if (_lexer.current() == lex_double_colon)
  9159. {
  9160. // parse axis name
  9161. if (axis_specified)
  9162. return error("Two axis specifiers in one step");
  9163. axis = parse_axis_name(nt_name, axis_specified);
  9164. if (!axis_specified)
  9165. return error("Unknown axis");
  9166. // read actual node test
  9167. _lexer.next();
  9168. if (_lexer.current() == lex_multiply)
  9169. {
  9170. nt_type = nodetest_all;
  9171. nt_name = xpath_lexer_string();
  9172. _lexer.next();
  9173. }
  9174. else if (_lexer.current() == lex_string)
  9175. {
  9176. nt_name = _lexer.contents();
  9177. _lexer.next();
  9178. }
  9179. else
  9180. {
  9181. return error("Unrecognized node test");
  9182. }
  9183. }
  9184. if (nt_type == nodetest_none)
  9185. {
  9186. // node type test or processing-instruction
  9187. if (_lexer.current() == lex_open_brace)
  9188. {
  9189. _lexer.next();
  9190. if (_lexer.current() == lex_close_brace)
  9191. {
  9192. _lexer.next();
  9193. nt_type = parse_node_test_type(nt_name);
  9194. if (nt_type == nodetest_none)
  9195. return error("Unrecognized node type");
  9196. nt_name = xpath_lexer_string();
  9197. }
  9198. else if (nt_name == PUGIXML_TEXT("processing-instruction"))
  9199. {
  9200. if (_lexer.current() != lex_quoted_string)
  9201. return error("Only literals are allowed as arguments to processing-instruction()");
  9202. nt_type = nodetest_pi;
  9203. nt_name = _lexer.contents();
  9204. _lexer.next();
  9205. if (_lexer.current() != lex_close_brace)
  9206. return error("Unmatched brace near processing-instruction()");
  9207. _lexer.next();
  9208. }
  9209. else
  9210. {
  9211. return error("Unmatched brace near node type test");
  9212. }
  9213. }
  9214. // QName or NCName:*
  9215. else
  9216. {
  9217. if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
  9218. {
  9219. nt_name.end--; // erase *
  9220. nt_type = nodetest_all_in_namespace;
  9221. }
  9222. else
  9223. {
  9224. nt_type = nodetest_name;
  9225. }
  9226. }
  9227. }
  9228. }
  9229. else if (_lexer.current() == lex_multiply)
  9230. {
  9231. nt_type = nodetest_all;
  9232. _lexer.next();
  9233. }
  9234. else
  9235. {
  9236. return error("Unrecognized node test");
  9237. }
  9238. const char_t* nt_name_copy = alloc_string(nt_name);
  9239. if (!nt_name_copy) return 0;
  9240. xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
  9241. if (!n) return 0;
  9242. size_t old_depth = _depth;
  9243. xpath_ast_node* last = 0;
  9244. while (_lexer.current() == lex_open_square_brace)
  9245. {
  9246. _lexer.next();
  9247. if (++_depth > xpath_ast_depth_limit)
  9248. return error_rec();
  9249. xpath_ast_node* expr = parse_expression();
  9250. if (!expr) return 0;
  9251. xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
  9252. if (!pred) return 0;
  9253. if (_lexer.current() != lex_close_square_brace)
  9254. return error("Expected ']' to match an opening '['");
  9255. _lexer.next();
  9256. if (last) last->set_next(pred);
  9257. else n->set_right(pred);
  9258. last = pred;
  9259. }
  9260. _depth = old_depth;
  9261. return n;
  9262. }
  9263. // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
  9264. xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
  9265. {
  9266. xpath_ast_node* n = parse_step(set);
  9267. if (!n) return 0;
  9268. size_t old_depth = _depth;
  9269. while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9270. {
  9271. lexeme_t l = _lexer.current();
  9272. _lexer.next();
  9273. if (++_depth > xpath_ast_depth_limit)
  9274. return error_rec();
  9275. if (l == lex_double_slash)
  9276. {
  9277. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9278. if (!n) return 0;
  9279. }
  9280. n = parse_step(n);
  9281. if (!n) return 0;
  9282. }
  9283. _depth = old_depth;
  9284. return n;
  9285. }
  9286. // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
  9287. // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
  9288. xpath_ast_node* parse_location_path()
  9289. {
  9290. if (_lexer.current() == lex_slash)
  9291. {
  9292. _lexer.next();
  9293. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9294. if (!n) return 0;
  9295. // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
  9296. lexeme_t l = _lexer.current();
  9297. if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
  9298. return parse_relative_location_path(n);
  9299. else
  9300. return n;
  9301. }
  9302. else if (_lexer.current() == lex_double_slash)
  9303. {
  9304. _lexer.next();
  9305. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9306. if (!n) return 0;
  9307. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9308. if (!n) return 0;
  9309. return parse_relative_location_path(n);
  9310. }
  9311. // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
  9312. return parse_relative_location_path(0);
  9313. }
  9314. // PathExpr ::= LocationPath
  9315. // | FilterExpr
  9316. // | FilterExpr '/' RelativeLocationPath
  9317. // | FilterExpr '//' RelativeLocationPath
  9318. // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
  9319. // UnaryExpr ::= UnionExpr | '-' UnaryExpr
  9320. xpath_ast_node* parse_path_or_unary_expression()
  9321. {
  9322. // Clarification.
  9323. // PathExpr begins with either LocationPath or FilterExpr.
  9324. // FilterExpr begins with PrimaryExpr
  9325. // PrimaryExpr begins with '$' in case of it being a variable reference,
  9326. // '(' in case of it being an expression, string literal, number constant or
  9327. // function call.
  9328. if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
  9329. _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
  9330. _lexer.current() == lex_string)
  9331. {
  9332. if (_lexer.current() == lex_string)
  9333. {
  9334. // This is either a function call, or not - if not, we shall proceed with location path
  9335. const char_t* state = _lexer.state();
  9336. while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
  9337. if (*state != '(')
  9338. return parse_location_path();
  9339. // This looks like a function call; however this still can be a node-test. Check it.
  9340. if (parse_node_test_type(_lexer.contents()) != nodetest_none)
  9341. return parse_location_path();
  9342. }
  9343. xpath_ast_node* n = parse_filter_expression();
  9344. if (!n) return 0;
  9345. if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9346. {
  9347. lexeme_t l = _lexer.current();
  9348. _lexer.next();
  9349. if (l == lex_double_slash)
  9350. {
  9351. if (n->rettype() != xpath_type_node_set)
  9352. return error("Step has to be applied to node set");
  9353. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9354. if (!n) return 0;
  9355. }
  9356. // select from location path
  9357. return parse_relative_location_path(n);
  9358. }
  9359. return n;
  9360. }
  9361. else if (_lexer.current() == lex_minus)
  9362. {
  9363. _lexer.next();
  9364. // precedence 7+ - only parses union expressions
  9365. xpath_ast_node* n = parse_expression(7);
  9366. if (!n) return 0;
  9367. return alloc_node(ast_op_negate, xpath_type_number, n);
  9368. }
  9369. else
  9370. {
  9371. return parse_location_path();
  9372. }
  9373. }
  9374. struct binary_op_t
  9375. {
  9376. ast_type_t asttype;
  9377. xpath_value_type rettype;
  9378. int precedence;
  9379. binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
  9380. {
  9381. }
  9382. binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
  9383. {
  9384. }
  9385. static binary_op_t parse(xpath_lexer& lexer)
  9386. {
  9387. switch (lexer.current())
  9388. {
  9389. case lex_string:
  9390. if (lexer.contents() == PUGIXML_TEXT("or"))
  9391. return binary_op_t(ast_op_or, xpath_type_boolean, 1);
  9392. else if (lexer.contents() == PUGIXML_TEXT("and"))
  9393. return binary_op_t(ast_op_and, xpath_type_boolean, 2);
  9394. else if (lexer.contents() == PUGIXML_TEXT("div"))
  9395. return binary_op_t(ast_op_divide, xpath_type_number, 6);
  9396. else if (lexer.contents() == PUGIXML_TEXT("mod"))
  9397. return binary_op_t(ast_op_mod, xpath_type_number, 6);
  9398. else
  9399. return binary_op_t();
  9400. case lex_equal:
  9401. return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
  9402. case lex_not_equal:
  9403. return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
  9404. case lex_less:
  9405. return binary_op_t(ast_op_less, xpath_type_boolean, 4);
  9406. case lex_greater:
  9407. return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
  9408. case lex_less_or_equal:
  9409. return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
  9410. case lex_greater_or_equal:
  9411. return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
  9412. case lex_plus:
  9413. return binary_op_t(ast_op_add, xpath_type_number, 5);
  9414. case lex_minus:
  9415. return binary_op_t(ast_op_subtract, xpath_type_number, 5);
  9416. case lex_multiply:
  9417. return binary_op_t(ast_op_multiply, xpath_type_number, 6);
  9418. case lex_union:
  9419. return binary_op_t(ast_op_union, xpath_type_node_set, 7);
  9420. default:
  9421. return binary_op_t();
  9422. }
  9423. }
  9424. };
  9425. xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
  9426. {
  9427. binary_op_t op = binary_op_t::parse(_lexer);
  9428. while (op.asttype != ast_unknown && op.precedence >= limit)
  9429. {
  9430. _lexer.next();
  9431. if (++_depth > xpath_ast_depth_limit)
  9432. return error_rec();
  9433. xpath_ast_node* rhs = parse_path_or_unary_expression();
  9434. if (!rhs) return 0;
  9435. binary_op_t nextop = binary_op_t::parse(_lexer);
  9436. while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
  9437. {
  9438. rhs = parse_expression_rec(rhs, nextop.precedence);
  9439. if (!rhs) return 0;
  9440. nextop = binary_op_t::parse(_lexer);
  9441. }
  9442. if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
  9443. return error("Union operator has to be applied to node sets");
  9444. lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
  9445. if (!lhs) return 0;
  9446. op = binary_op_t::parse(_lexer);
  9447. }
  9448. return lhs;
  9449. }
  9450. // Expr ::= OrExpr
  9451. // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
  9452. // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
  9453. // EqualityExpr ::= RelationalExpr
  9454. // | EqualityExpr '=' RelationalExpr
  9455. // | EqualityExpr '!=' RelationalExpr
  9456. // RelationalExpr ::= AdditiveExpr
  9457. // | RelationalExpr '<' AdditiveExpr
  9458. // | RelationalExpr '>' AdditiveExpr
  9459. // | RelationalExpr '<=' AdditiveExpr
  9460. // | RelationalExpr '>=' AdditiveExpr
  9461. // AdditiveExpr ::= MultiplicativeExpr
  9462. // | AdditiveExpr '+' MultiplicativeExpr
  9463. // | AdditiveExpr '-' MultiplicativeExpr
  9464. // MultiplicativeExpr ::= UnaryExpr
  9465. // | MultiplicativeExpr '*' UnaryExpr
  9466. // | MultiplicativeExpr 'div' UnaryExpr
  9467. // | MultiplicativeExpr 'mod' UnaryExpr
  9468. xpath_ast_node* parse_expression(int limit = 0)
  9469. {
  9470. size_t old_depth = _depth;
  9471. if (++_depth > xpath_ast_depth_limit)
  9472. return error_rec();
  9473. xpath_ast_node* n = parse_path_or_unary_expression();
  9474. if (!n) return 0;
  9475. n = parse_expression_rec(n, limit);
  9476. _depth = old_depth;
  9477. return n;
  9478. }
  9479. xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
  9480. {
  9481. }
  9482. xpath_ast_node* parse()
  9483. {
  9484. xpath_ast_node* n = parse_expression();
  9485. if (!n) return 0;
  9486. assert(_depth == 0);
  9487. // check if there are unparsed tokens left
  9488. if (_lexer.current() != lex_eof)
  9489. return error("Incorrect query");
  9490. return n;
  9491. }
  9492. static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
  9493. {
  9494. xpath_parser parser(query, variables, alloc, result);
  9495. return parser.parse();
  9496. }
  9497. };
  9498. struct xpath_query_impl
  9499. {
  9500. static xpath_query_impl* create()
  9501. {
  9502. void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
  9503. if (!memory) return 0;
  9504. return new (memory) xpath_query_impl();
  9505. }
  9506. static void destroy(xpath_query_impl* impl)
  9507. {
  9508. // free all allocated pages
  9509. impl->alloc.release();
  9510. // free allocator memory (with the first page)
  9511. xml_memory::deallocate(impl);
  9512. }
  9513. xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
  9514. {
  9515. block.next = 0;
  9516. block.capacity = sizeof(block.data);
  9517. }
  9518. xpath_ast_node* root;
  9519. xpath_allocator alloc;
  9520. xpath_memory_block block;
  9521. bool oom;
  9522. };
  9523. PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
  9524. {
  9525. if (!impl) return 0;
  9526. if (impl->root->rettype() != xpath_type_node_set)
  9527. {
  9528. #ifdef PUGIXML_NO_EXCEPTIONS
  9529. return 0;
  9530. #else
  9531. xpath_parse_result res;
  9532. res.error = "Expression does not evaluate to node set";
  9533. throw xpath_exception(res);
  9534. #endif
  9535. }
  9536. return impl->root;
  9537. }
  9538. PUGI__NS_END
  9539. namespace pugi
  9540. {
  9541. #ifndef PUGIXML_NO_EXCEPTIONS
  9542. PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
  9543. {
  9544. assert(_result.error);
  9545. }
  9546. PUGI__FN const char* xpath_exception::what() const throw()
  9547. {
  9548. return _result.error;
  9549. }
  9550. PUGI__FN const xpath_parse_result& xpath_exception::result() const
  9551. {
  9552. return _result;
  9553. }
  9554. #endif
  9555. PUGI__FN xpath_node::xpath_node()
  9556. {
  9557. }
  9558. PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
  9559. {
  9560. }
  9561. PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
  9562. {
  9563. }
  9564. PUGI__FN xml_node xpath_node::node() const
  9565. {
  9566. return _attribute ? xml_node() : _node;
  9567. }
  9568. PUGI__FN xml_attribute xpath_node::attribute() const
  9569. {
  9570. return _attribute;
  9571. }
  9572. PUGI__FN xml_node xpath_node::parent() const
  9573. {
  9574. return _attribute ? _node : _node.parent();
  9575. }
  9576. PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
  9577. {
  9578. }
  9579. PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
  9580. {
  9581. return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
  9582. }
  9583. PUGI__FN bool xpath_node::operator!() const
  9584. {
  9585. return !(_node || _attribute);
  9586. }
  9587. PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
  9588. {
  9589. return _node == n._node && _attribute == n._attribute;
  9590. }
  9591. PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
  9592. {
  9593. return _node != n._node || _attribute != n._attribute;
  9594. }
  9595. #ifdef __BORLANDC__
  9596. PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
  9597. {
  9598. return (bool)lhs && rhs;
  9599. }
  9600. PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
  9601. {
  9602. return (bool)lhs || rhs;
  9603. }
  9604. #endif
  9605. PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
  9606. {
  9607. assert(begin_ <= end_);
  9608. size_t size_ = static_cast<size_t>(end_ - begin_);
  9609. // use internal buffer for 0 or 1 elements, heap buffer otherwise
  9610. xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
  9611. if (!storage)
  9612. {
  9613. #ifdef PUGIXML_NO_EXCEPTIONS
  9614. return;
  9615. #else
  9616. throw std::bad_alloc();
  9617. #endif
  9618. }
  9619. // deallocate old buffer
  9620. if (_begin != _storage)
  9621. impl::xml_memory::deallocate(_begin);
  9622. // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
  9623. if (size_)
  9624. memcpy(storage, begin_, size_ * sizeof(xpath_node));
  9625. _begin = storage;
  9626. _end = storage + size_;
  9627. _type = type_;
  9628. }
  9629. #ifdef PUGIXML_HAS_MOVE
  9630. PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
  9631. {
  9632. _type = rhs._type;
  9633. _storage[0] = rhs._storage[0];
  9634. _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
  9635. _end = _begin + (rhs._end - rhs._begin);
  9636. rhs._type = type_unsorted;
  9637. rhs._begin = rhs._storage;
  9638. rhs._end = rhs._storage;
  9639. }
  9640. #endif
  9641. PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
  9642. {
  9643. }
  9644. PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
  9645. {
  9646. _assign(begin_, end_, type_);
  9647. }
  9648. PUGI__FN xpath_node_set::~xpath_node_set()
  9649. {
  9650. if (_begin != _storage)
  9651. impl::xml_memory::deallocate(_begin);
  9652. }
  9653. PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
  9654. {
  9655. _assign(ns._begin, ns._end, ns._type);
  9656. }
  9657. PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
  9658. {
  9659. if (this == &ns) return *this;
  9660. _assign(ns._begin, ns._end, ns._type);
  9661. return *this;
  9662. }
  9663. #ifdef PUGIXML_HAS_MOVE
  9664. PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
  9665. {
  9666. _move(rhs);
  9667. }
  9668. PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
  9669. {
  9670. if (this == &rhs) return *this;
  9671. if (_begin != _storage)
  9672. impl::xml_memory::deallocate(_begin);
  9673. _move(rhs);
  9674. return *this;
  9675. }
  9676. #endif
  9677. PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
  9678. {
  9679. return _type;
  9680. }
  9681. PUGI__FN size_t xpath_node_set::size() const
  9682. {
  9683. return _end - _begin;
  9684. }
  9685. PUGI__FN bool xpath_node_set::empty() const
  9686. {
  9687. return _begin == _end;
  9688. }
  9689. PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
  9690. {
  9691. assert(index < size());
  9692. return _begin[index];
  9693. }
  9694. PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
  9695. {
  9696. return _begin;
  9697. }
  9698. PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
  9699. {
  9700. return _end;
  9701. }
  9702. PUGI__FN void xpath_node_set::sort(bool reverse)
  9703. {
  9704. _type = impl::xpath_sort(_begin, _end, _type, reverse);
  9705. }
  9706. PUGI__FN xpath_node xpath_node_set::first() const
  9707. {
  9708. return impl::xpath_first(_begin, _end, _type);
  9709. }
  9710. PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
  9711. {
  9712. }
  9713. PUGI__FN xpath_parse_result::operator bool() const
  9714. {
  9715. return error == 0;
  9716. }
  9717. PUGI__FN const char* xpath_parse_result::description() const
  9718. {
  9719. return error ? error : "No error";
  9720. }
  9721. PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
  9722. {
  9723. }
  9724. PUGI__FN const char_t* xpath_variable::name() const
  9725. {
  9726. switch (_type)
  9727. {
  9728. case xpath_type_node_set:
  9729. return static_cast<const impl::xpath_variable_node_set*>(this)->name;
  9730. case xpath_type_number:
  9731. return static_cast<const impl::xpath_variable_number*>(this)->name;
  9732. case xpath_type_string:
  9733. return static_cast<const impl::xpath_variable_string*>(this)->name;
  9734. case xpath_type_boolean:
  9735. return static_cast<const impl::xpath_variable_boolean*>(this)->name;
  9736. default:
  9737. assert(false && "Invalid variable type"); // unreachable
  9738. return 0;
  9739. }
  9740. }
  9741. PUGI__FN xpath_value_type xpath_variable::type() const
  9742. {
  9743. return _type;
  9744. }
  9745. PUGI__FN bool xpath_variable::get_boolean() const
  9746. {
  9747. return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
  9748. }
  9749. PUGI__FN double xpath_variable::get_number() const
  9750. {
  9751. return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
  9752. }
  9753. PUGI__FN const char_t* xpath_variable::get_string() const
  9754. {
  9755. const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
  9756. return value ? value : PUGIXML_TEXT("");
  9757. }
  9758. PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
  9759. {
  9760. return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
  9761. }
  9762. PUGI__FN bool xpath_variable::set(bool value)
  9763. {
  9764. if (_type != xpath_type_boolean) return false;
  9765. static_cast<impl::xpath_variable_boolean*>(this)->value = value;
  9766. return true;
  9767. }
  9768. PUGI__FN bool xpath_variable::set(double value)
  9769. {
  9770. if (_type != xpath_type_number) return false;
  9771. static_cast<impl::xpath_variable_number*>(this)->value = value;
  9772. return true;
  9773. }
  9774. PUGI__FN bool xpath_variable::set(const char_t* value)
  9775. {
  9776. if (_type != xpath_type_string) return false;
  9777. impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
  9778. // duplicate string
  9779. size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
  9780. char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
  9781. if (!copy) return false;
  9782. memcpy(copy, value, size);
  9783. // replace old string
  9784. if (var->value) impl::xml_memory::deallocate(var->value);
  9785. var->value = copy;
  9786. return true;
  9787. }
  9788. PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
  9789. {
  9790. if (_type != xpath_type_node_set) return false;
  9791. static_cast<impl::xpath_variable_node_set*>(this)->value = value;
  9792. return true;
  9793. }
  9794. PUGI__FN xpath_variable_set::xpath_variable_set()
  9795. {
  9796. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9797. _data[i] = 0;
  9798. }
  9799. PUGI__FN xpath_variable_set::~xpath_variable_set()
  9800. {
  9801. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9802. _destroy(_data[i]);
  9803. }
  9804. PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
  9805. {
  9806. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9807. _data[i] = 0;
  9808. _assign(rhs);
  9809. }
  9810. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
  9811. {
  9812. if (this == &rhs) return *this;
  9813. _assign(rhs);
  9814. return *this;
  9815. }
  9816. #ifdef PUGIXML_HAS_MOVE
  9817. PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9818. {
  9819. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9820. {
  9821. _data[i] = rhs._data[i];
  9822. rhs._data[i] = 0;
  9823. }
  9824. }
  9825. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9826. {
  9827. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9828. {
  9829. _destroy(_data[i]);
  9830. _data[i] = rhs._data[i];
  9831. rhs._data[i] = 0;
  9832. }
  9833. return *this;
  9834. }
  9835. #endif
  9836. PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
  9837. {
  9838. xpath_variable_set temp;
  9839. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9840. if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
  9841. return;
  9842. _swap(temp);
  9843. }
  9844. PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
  9845. {
  9846. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9847. {
  9848. xpath_variable* chain = _data[i];
  9849. _data[i] = rhs._data[i];
  9850. rhs._data[i] = chain;
  9851. }
  9852. }
  9853. PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
  9854. {
  9855. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  9856. size_t hash = impl::hash_string(name) % hash_size;
  9857. // look for existing variable
  9858. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  9859. if (impl::strequal(var->name(), name))
  9860. return var;
  9861. return 0;
  9862. }
  9863. PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
  9864. {
  9865. xpath_variable* last = 0;
  9866. while (var)
  9867. {
  9868. // allocate storage for new variable
  9869. xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
  9870. if (!nvar) return false;
  9871. // link the variable to the result immediately to handle failures gracefully
  9872. if (last)
  9873. last->_next = nvar;
  9874. else
  9875. *out_result = nvar;
  9876. last = nvar;
  9877. // copy the value; this can fail due to out-of-memory conditions
  9878. if (!impl::copy_xpath_variable(nvar, var)) return false;
  9879. var = var->_next;
  9880. }
  9881. return true;
  9882. }
  9883. PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
  9884. {
  9885. while (var)
  9886. {
  9887. xpath_variable* next = var->_next;
  9888. impl::delete_xpath_variable(var->_type, var);
  9889. var = next;
  9890. }
  9891. }
  9892. PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
  9893. {
  9894. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  9895. size_t hash = impl::hash_string(name) % hash_size;
  9896. // look for existing variable
  9897. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  9898. if (impl::strequal(var->name(), name))
  9899. return var->type() == type ? var : 0;
  9900. // add new variable
  9901. xpath_variable* result = impl::new_xpath_variable(type, name);
  9902. if (result)
  9903. {
  9904. result->_next = _data[hash];
  9905. _data[hash] = result;
  9906. }
  9907. return result;
  9908. }
  9909. PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
  9910. {
  9911. xpath_variable* var = add(name, xpath_type_boolean);
  9912. return var ? var->set(value) : false;
  9913. }
  9914. PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
  9915. {
  9916. xpath_variable* var = add(name, xpath_type_number);
  9917. return var ? var->set(value) : false;
  9918. }
  9919. PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
  9920. {
  9921. xpath_variable* var = add(name, xpath_type_string);
  9922. return var ? var->set(value) : false;
  9923. }
  9924. PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
  9925. {
  9926. xpath_variable* var = add(name, xpath_type_node_set);
  9927. return var ? var->set(value) : false;
  9928. }
  9929. PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
  9930. {
  9931. return _find(name);
  9932. }
  9933. PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
  9934. {
  9935. return _find(name);
  9936. }
  9937. PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
  9938. {
  9939. impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
  9940. if (!qimpl)
  9941. {
  9942. #ifdef PUGIXML_NO_EXCEPTIONS
  9943. _result.error = "Out of memory";
  9944. #else
  9945. throw std::bad_alloc();
  9946. #endif
  9947. }
  9948. else
  9949. {
  9950. using impl::auto_deleter; // MSVC7 workaround
  9951. auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
  9952. qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
  9953. if (qimpl->root)
  9954. {
  9955. qimpl->root->optimize(&qimpl->alloc);
  9956. _impl = impl.release();
  9957. _result.error = 0;
  9958. }
  9959. else
  9960. {
  9961. #ifdef PUGIXML_NO_EXCEPTIONS
  9962. if (qimpl->oom) _result.error = "Out of memory";
  9963. #else
  9964. if (qimpl->oom) throw std::bad_alloc();
  9965. throw xpath_exception(_result);
  9966. #endif
  9967. }
  9968. }
  9969. }
  9970. PUGI__FN xpath_query::xpath_query(): _impl(0)
  9971. {
  9972. }
  9973. PUGI__FN xpath_query::~xpath_query()
  9974. {
  9975. if (_impl)
  9976. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  9977. }
  9978. #ifdef PUGIXML_HAS_MOVE
  9979. PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
  9980. {
  9981. _impl = rhs._impl;
  9982. _result = rhs._result;
  9983. rhs._impl = 0;
  9984. rhs._result = xpath_parse_result();
  9985. }
  9986. PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
  9987. {
  9988. if (this == &rhs) return *this;
  9989. if (_impl)
  9990. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  9991. _impl = rhs._impl;
  9992. _result = rhs._result;
  9993. rhs._impl = 0;
  9994. rhs._result = xpath_parse_result();
  9995. return *this;
  9996. }
  9997. #endif
  9998. PUGI__FN xpath_value_type xpath_query::return_type() const
  9999. {
  10000. if (!_impl) return xpath_type_none;
  10001. return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
  10002. }
  10003. PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
  10004. {
  10005. if (!_impl) return false;
  10006. impl::xpath_context c(n, 1, 1);
  10007. impl::xpath_stack_data sd;
  10008. bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
  10009. if (sd.oom)
  10010. {
  10011. #ifdef PUGIXML_NO_EXCEPTIONS
  10012. return false;
  10013. #else
  10014. throw std::bad_alloc();
  10015. #endif
  10016. }
  10017. return r;
  10018. }
  10019. PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
  10020. {
  10021. if (!_impl) return impl::gen_nan();
  10022. impl::xpath_context c(n, 1, 1);
  10023. impl::xpath_stack_data sd;
  10024. double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
  10025. if (sd.oom)
  10026. {
  10027. #ifdef PUGIXML_NO_EXCEPTIONS
  10028. return impl::gen_nan();
  10029. #else
  10030. throw std::bad_alloc();
  10031. #endif
  10032. }
  10033. return r;
  10034. }
  10035. #ifndef PUGIXML_NO_STL
  10036. PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
  10037. {
  10038. if (!_impl) return string_t();
  10039. impl::xpath_context c(n, 1, 1);
  10040. impl::xpath_stack_data sd;
  10041. impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
  10042. if (sd.oom)
  10043. {
  10044. #ifdef PUGIXML_NO_EXCEPTIONS
  10045. return string_t();
  10046. #else
  10047. throw std::bad_alloc();
  10048. #endif
  10049. }
  10050. return string_t(r.c_str(), r.length());
  10051. }
  10052. #endif
  10053. PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
  10054. {
  10055. impl::xpath_context c(n, 1, 1);
  10056. impl::xpath_stack_data sd;
  10057. impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
  10058. if (sd.oom)
  10059. {
  10060. #ifdef PUGIXML_NO_EXCEPTIONS
  10061. r = impl::xpath_string();
  10062. #else
  10063. throw std::bad_alloc();
  10064. #endif
  10065. }
  10066. size_t full_size = r.length() + 1;
  10067. if (capacity > 0)
  10068. {
  10069. size_t size = (full_size < capacity) ? full_size : capacity;
  10070. assert(size > 0);
  10071. memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
  10072. buffer[size - 1] = 0;
  10073. }
  10074. return full_size;
  10075. }
  10076. PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
  10077. {
  10078. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10079. if (!root) return xpath_node_set();
  10080. impl::xpath_context c(n, 1, 1);
  10081. impl::xpath_stack_data sd;
  10082. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
  10083. if (sd.oom)
  10084. {
  10085. #ifdef PUGIXML_NO_EXCEPTIONS
  10086. return xpath_node_set();
  10087. #else
  10088. throw std::bad_alloc();
  10089. #endif
  10090. }
  10091. return xpath_node_set(r.begin(), r.end(), r.type());
  10092. }
  10093. PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
  10094. {
  10095. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10096. if (!root) return xpath_node();
  10097. impl::xpath_context c(n, 1, 1);
  10098. impl::xpath_stack_data sd;
  10099. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
  10100. if (sd.oom)
  10101. {
  10102. #ifdef PUGIXML_NO_EXCEPTIONS
  10103. return xpath_node();
  10104. #else
  10105. throw std::bad_alloc();
  10106. #endif
  10107. }
  10108. return r.first();
  10109. }
  10110. PUGI__FN const xpath_parse_result& xpath_query::result() const
  10111. {
  10112. return _result;
  10113. }
  10114. PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
  10115. {
  10116. }
  10117. PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
  10118. {
  10119. return _impl ? unspecified_bool_xpath_query : 0;
  10120. }
  10121. PUGI__FN bool xpath_query::operator!() const
  10122. {
  10123. return !_impl;
  10124. }
  10125. PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
  10126. {
  10127. xpath_query q(query, variables);
  10128. return q.evaluate_node(*this);
  10129. }
  10130. PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
  10131. {
  10132. return query.evaluate_node(*this);
  10133. }
  10134. PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
  10135. {
  10136. xpath_query q(query, variables);
  10137. return q.evaluate_node_set(*this);
  10138. }
  10139. PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
  10140. {
  10141. return query.evaluate_node_set(*this);
  10142. }
  10143. PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
  10144. {
  10145. xpath_query q(query, variables);
  10146. return q.evaluate_node(*this);
  10147. }
  10148. PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
  10149. {
  10150. return query.evaluate_node(*this);
  10151. }
  10152. }
  10153. #endif
  10154. #ifdef __BORLANDC__
  10155. # pragma option pop
  10156. #endif
  10157. // Intel C++ does not properly keep warning state for function templates,
  10158. // so popping warning state at the end of translation unit leads to warnings in the middle.
  10159. #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
  10160. # pragma warning(pop)
  10161. #endif
  10162. #if defined(_MSC_VER) && defined(__c2__)
  10163. # pragma clang diagnostic pop
  10164. #endif
  10165. // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
  10166. #undef PUGI__NO_INLINE
  10167. #undef PUGI__UNLIKELY
  10168. #undef PUGI__STATIC_ASSERT
  10169. #undef PUGI__DMC_VOLATILE
  10170. #undef PUGI__UNSIGNED_OVERFLOW
  10171. #undef PUGI__MSVC_CRT_VERSION
  10172. #undef PUGI__SNPRINTF
  10173. #undef PUGI__NS_BEGIN
  10174. #undef PUGI__NS_END
  10175. #undef PUGI__FN
  10176. #undef PUGI__FN_NO_INLINE
  10177. #undef PUGI__GETHEADER_IMPL
  10178. #undef PUGI__GETPAGE_IMPL
  10179. #undef PUGI__GETPAGE
  10180. #undef PUGI__NODETYPE
  10181. #undef PUGI__IS_CHARTYPE_IMPL
  10182. #undef PUGI__IS_CHARTYPE
  10183. #undef PUGI__IS_CHARTYPEX
  10184. #undef PUGI__ENDSWITH
  10185. #undef PUGI__SKIPWS
  10186. #undef PUGI__OPTSET
  10187. #undef PUGI__PUSHNODE
  10188. #undef PUGI__POPNODE
  10189. #undef PUGI__SCANFOR
  10190. #undef PUGI__SCANWHILE
  10191. #undef PUGI__SCANWHILE_UNROLL
  10192. #undef PUGI__ENDSEG
  10193. #undef PUGI__THROW_ERROR
  10194. #undef PUGI__CHECK_ERROR
  10195. #endif
  10196. /**
  10197. * Copyright (c) 2006-2020 Arseny Kapoulkine
  10198. *
  10199. * Permission is hereby granted, free of charge, to any person
  10200. * obtaining a copy of this software and associated documentation
  10201. * files (the "Software"), to deal in the Software without
  10202. * restriction, including without limitation the rights to use,
  10203. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10204. * copies of the Software, and to permit persons to whom the
  10205. * Software is furnished to do so, subject to the following
  10206. * conditions:
  10207. *
  10208. * The above copyright notice and this permission notice shall be
  10209. * included in all copies or substantial portions of the Software.
  10210. *
  10211. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  10212. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  10213. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  10214. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  10215. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  10216. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  10217. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  10218. * OTHER DEALINGS IN THE SOFTWARE.
  10219. */