PNG  IHDRQgAMA a cHRMz&u0`:pQ<bKGDgmIDATxwUﹻ& ^CX(J I@ "% (** BX +*i"]j(IH{~R)[~>h{}gy)I$Ij .I$I$ʊy@}x.: $I$Ii}VZPC)I$IF ^0ʐJ$I$Q^}{"r=OzI$gRZeC.IOvH eKX $IMpxsk.쒷/&r[޳<v| .I~)@$updYRa$I |M.e JaֶpSYR6j>h%IRز if&uJ)M$I vLi=H;7UJ,],X$I1AҒJ$ XY XzI@GNҥRT)E@;]K*Mw;#5_wOn~\ DC&$(A5 RRFkvIR}l!RytRl;~^ǷJj اy뷦BZJr&ӥ8Pjw~vnv X^(I;4R=P[3]J,]ȏ~:3?[ a&e)`e*P[4]T=Cq6R[ ~ޤrXR Հg(t_HZ-Hg M$ãmL5R uk*`%C-E6/%[t X.{8P9Z.vkXŐKjgKZHg(aK9ڦmKjѺm_ \#$5,)-  61eJ,5m| r'= &ڡd%-]J on Xm|{ RҞe $eڧY XYrԮ-a7RK6h>n$5AVڴi*ֆK)mѦtmr1p| q:흺,)Oi*ֺK)ܬ֦K-5r3>0ԔHjJئEZj,%re~/z%jVMڸmrt)3]J,T K֦OvԒgii*bKiNO~%PW0=dii2tJ9Jݕ{7"I P9JKTbu,%r"6RKU}Ij2HKZXJ,妝 XYrP ެ24c%i^IK|.H,%rb:XRl1X4Pe/`x&P8Pj28Mzsx2r\zRPz4J}yP[g=L) .Q[6RjWgp FIH*-`IMRaK9TXcq*I y[jE>cw%gLRԕiFCj-ďa`#e~I j,%r,)?[gp FI˨mnWX#>mʔ XA DZf9,nKҲzIZXJ,L#kiPz4JZF,I,`61%2s $,VOϚ2/UFJfy7K> X+6 STXIeJILzMfKm LRaK9%|4p9LwJI!`NsiazĔ)%- XMq>pk$-$Q2x#N ؎-QR}ᶦHZډ)J,l#i@yn3LN`;nڔ XuX5pF)m|^0(>BHF9(cզEerJI rg7 4I@z0\JIi䵙RR0s;$s6eJ,`n 䂦0a)S)A 1eJ,堌#635RIgpNHuTH_SԕqVe ` &S)>p;S$魁eKIuX`I4춒o}`m$1":PI<[v9^\pTJjriRŭ P{#{R2,`)e-`mgj~1ϣLKam7&U\j/3mJ,`F;M'䱀 .KR#)yhTq;pcK9(q!w?uRR,n.yw*UXj#\]ɱ(qv2=RqfB#iJmmL<]Y͙#$5 uTU7ӦXR+q,`I}qL'`6Kͷ6r,]0S$- [RKR3oiRE|nӦXR.(i:LDLTJjY%o:)6rxzҒqTJjh㞦I.$YR.ʼnGZ\ֿf:%55 I˼!6dKxm4E"mG_ s? .e*?LRfK9%q#uh$)i3ULRfK9yxm܌bj84$i1U^@Wbm4uJ,ҪA>_Ij?1v32[gLRD96oTaR׿N7%L2 NT,`)7&ƝL*꽙yp_$M2#AS,`)7$rkTA29_Iye"|/0t)$n XT2`YJ;6Jx".e<`$) PI$5V4]29SRI>~=@j]lp2`K9Jaai^" Ԋ29ORI%:XV5]JmN9]H;1UC39NI%Xe78t)a;Oi Ҙ>Xt"~G>_mn:%|~ޅ_+]$o)@ǀ{hgN;IK6G&rp)T2i୦KJuv*T=TOSV>(~D>dm,I*Ɛ:R#ۙNI%D>G.n$o;+#RR!.eU˽TRI28t)1LWϚ>IJa3oFbu&:tJ*(F7y0ZR ^p'Ii L24x| XRI%ۄ>S1]Jy[zL$adB7.eh4%%누>WETf+3IR:I3Xה)3אOۦSRO'ٺ)S}"qOr[B7ϙ.edG)^ETR"RtRݜh0}LFVӦDB^k_JDj\=LS(Iv─aTeZ%eUAM-0;~˃@i|l @S4y72>sX-vA}ϛBI!ݎߨWl*)3{'Y|iSlEڻ(5KtSI$Uv02,~ԩ~x;P4ցCrO%tyn425:KMlD ^4JRxSهF_}شJTS6uj+ﷸk$eZO%G*^V2u3EMj3k%)okI]dT)URKDS 7~m@TJR~荪fT"֛L \sM -0T KfJz+nإKr L&j()[E&I ߴ>e FW_kJR|!O:5/2跌3T-'|zX ryp0JS ~^F>-2< `*%ZFP)bSn"L :)+pʷf(pO3TMW$~>@~ū:TAIsV1}S2<%ޟM?@iT ,Eūoz%i~g|`wS(]oȤ8)$ ntu`өe`6yPl IzMI{ʣzʨ )IZ2= ld:5+請M$-ї;U>_gsY$ÁN5WzWfIZ)-yuXIfp~S*IZdt;t>KūKR|$#LcԀ+2\;kJ`]YǔM1B)UbG"IRߊ<xܾӔJ0Z='Y嵤 Leveg)$znV-º^3Ւof#0Tfk^Zs[*I꯳3{)ˬW4Ւ4 OdpbZRS|*I 55#"&-IvT&/윚Ye:i$ 9{LkuRe[I~_\ؠ%>GL$iY8 9ܕ"S`kS.IlC;Ҏ4x&>u_0JLr<J2(^$5L s=MgV ~,Iju> 7r2)^=G$1:3G< `J3~&IR% 6Tx/rIj3O< ʔ&#f_yXJiގNSz; Tx(i8%#4 ~AS+IjerIUrIj362v885+IjAhK__5X%nV%Iͳ-y|7XV2v4fzo_68"S/I-qbf; LkF)KSM$ Ms>K WNV}^`-큧32ŒVؙGdu,^^m%6~Nn&͓3ŒVZMsRpfEW%IwdǀLm[7W&bIRL@Q|)* i ImsIMmKmyV`i$G+R 0tV'!V)֏28vU7͒vHꦼtxꗞT ;S}7Mf+fIRHNZUkUx5SAJㄌ9MqμAIRi|j5)o*^'<$TwI1hEU^c_j?Е$%d`z cyf,XO IJnTgA UXRD }{H}^S,P5V2\Xx`pZ|Yk:$e ~ @nWL.j+ϝYb퇪bZ BVu)u/IJ_ 1[p.p60bC >|X91P:N\!5qUB}5a5ja `ubcVxYt1N0Zzl4]7­gKj]?4ϻ *[bg$)+À*x쳀ogO$~,5 زUS9 lq3+5mgw@np1sso Ӻ=|N6 /g(Wv7U;zωM=wk,0uTg_`_P`uz?2yI!b`kĸSo+Qx%!\οe|އԁKS-s6pu_(ֿ$i++T8=eY; צP+phxWQv*|p1. ά. XRkIQYP,drZ | B%wP|S5`~́@i޾ E;Չaw{o'Q?%iL{u D?N1BD!owPHReFZ* k_-~{E9b-~P`fE{AܶBJAFO wx6Rox5 K5=WwehS8 (JClJ~ p+Fi;ŗo+:bD#g(C"wA^ r.F8L;dzdIHUX݆ϞXg )IFqem%I4dj&ppT{'{HOx( Rk6^C٫O.)3:s(۳(Z?~ٻ89zmT"PLtw䥈5&b<8GZ-Y&K?e8,`I6e(֍xb83 `rzXj)F=l($Ij 2*(F?h(/9ik:I`m#p3MgLaKjc/U#n5S# m(^)=y=đx8ŬI[U]~SцA4p$-F i(R,7Cx;X=cI>{Km\ o(Tv2vx2qiiDJN,Ҏ!1f 5quBj1!8 rDFd(!WQl,gSkL1Bxg''՞^ǘ;pQ P(c_ IRujg(Wz bs#P­rz> k c&nB=q+ؔXn#r5)co*Ũ+G?7< |PQӣ'G`uOd>%Mctz# Ԫڞ&7CaQ~N'-P.W`Oedp03C!IZcIAMPUۀ5J<\u~+{9(FbbyAeBhOSܳ1 bÈT#ŠyDžs,`5}DC-`̞%r&ڙa87QWWp6e7 Rϫ/oY ꇅ Nܶըtc!LA T7V4Jsū I-0Pxz7QNF_iZgúWkG83 0eWr9 X]㾮݁#Jˢ C}0=3ݱtBi]_ &{{[/o[~ \q鯜00٩|cD3=4B_b RYb$óBRsf&lLX#M*C_L܄:gx)WΘsGSbuL rF$9';\4Ɍq'n[%p.Q`u hNb`eCQyQ|l_C>Lb꟟3hSb #xNxSs^ 88|Mz)}:](vbۢamŖ࿥ 0)Q7@0=?^k(*J}3ibkFn HjB׻NO z x}7p 0tfDX.lwgȔhԾŲ }6g E |LkLZteu+=q\Iv0쮑)QٵpH8/2?Σo>Jvppho~f>%bMM}\//":PTc(v9v!gոQ )UfVG+! 35{=x\2+ki,y$~A1iC6#)vC5^>+gǵ@1Hy٪7u;p psϰu/S <aʸGu'tD1ԝI<pg|6j'p:tպhX{o(7v],*}6a_ wXRk,O]Lܳ~Vo45rp"N5k;m{rZbΦ${#)`(Ŵg,;j%6j.pyYT?}-kBDc3qA`NWQū20/^AZW%NQ MI.X#P#,^Ebc&?XR tAV|Y.1!؅⨉ccww>ivl(JT~ u`ٵDm q)+Ri x/x8cyFO!/*!/&,7<.N,YDŽ&ܑQF1Bz)FPʛ?5d 6`kQձ λc؎%582Y&nD_$Je4>a?! ͨ|ȎWZSsv8 j(I&yj Jb5m?HWp=g}G3#|I,5v珿] H~R3@B[☉9Ox~oMy=J;xUVoj bUsl_35t-(ՃɼRB7U!qc+x4H_Qo֮$[GO<4`&č\GOc[.[*Af%mG/ ňM/r W/Nw~B1U3J?P&Y )`ѓZ1p]^l“W#)lWZilUQu`-m|xĐ,_ƪ|9i:_{*(3Gѧ}UoD+>m_?VPۅ15&}2|/pIOʵ> GZ9cmíتmnz)yߐbD >e}:) r|@R5qVSA10C%E_'^8cR7O;6[eKePGϦX7jb}OTGO^jn*媓7nGMC t,k31Rb (vyܴʭ!iTh8~ZYZp(qsRL ?b}cŨʊGO^!rPJO15MJ[c&~Z`"ѓޔH1C&^|Ш|rʼ,AwĴ?b5)tLU)F| &g٣O]oqSUjy(x<Ϳ3 .FSkoYg2 \_#wj{u'rQ>o;%n|F*O_L"e9umDds?.fuuQbIWz |4\0 sb;OvxOSs; G%T4gFRurj(֍ڑb uԖKDu1MK{1^ q; C=6\8FR艇!%\YÔU| 88m)֓NcLve C6z;o&X x59:q61Z(T7>C?gcļxѐ Z oo-08jہ x,`' ҔOcRlf~`jj".Nv+sM_]Zk g( UOPyεx%pUh2(@il0ݽQXxppx-NS( WO+轾 nFߢ3M<;z)FBZjciu/QoF 7R¥ ZFLF~#ȣߨ^<쩡ݛкvџ))ME>ώx4m#!-m!L;vv#~Y[đKmx9.[,UFS CVkZ +ߟrY٧IZd/ioi$%͝ب_ֶX3ܫhNU ZZgk=]=bbJS[wjU()*I =ώ:}-蹞lUj:1}MWm=̛ _ ¾,8{__m{_PVK^n3esw5ӫh#$-q=A̟> ,^I}P^J$qY~Q[ Xq9{#&T.^GVj__RKpn,b=`żY@^՝;z{paVKkQXj/)y TIc&F;FBG7wg ZZDG!x r_tƢ!}i/V=M/#nB8 XxЫ ^@CR<{䤭YCN)eKOSƟa $&g[i3.C6xrOc8TI;o hH6P&L{@q6[ Gzp^71j(l`J}]e6X☉#͕ ׈$AB1Vjh㭦IRsqFBjwQ_7Xk>y"N=MB0 ,C #o6MRc0|$)ف"1!ixY<B9mx `,tA>)5ػQ?jQ?cn>YZe Tisvh# GMމȇp:ԴVuږ8ɼH]C.5C!UV;F`mbBk LTMvPʍϤj?ԯ/Qr1NB`9s"s TYsz &9S%U԰> {<ؿSMxB|H\3@!U| k']$U+> |HHMLޢ?V9iD!-@x TIî%6Z*9X@HMW#?nN ,oe6?tQwڱ.]-y':mW0#!J82qFjH -`ѓ&M0u Uγmxϵ^-_\])@0Rt.8/?ٰCY]x}=sD3ojަЫNuS%U}ԤwHH>ڗjܷ_3gN q7[q2la*ArǓԖ+p8/RGM ]jacd(JhWko6ڎbj]i5Bj3+3!\j1UZLsLTv8HHmup<>gKMJj0@H%,W΃7R) ">c, xixј^ aܖ>H[i.UIHc U1=yW\=S*GR~)AF=`&2h`DzT󑓶J+?W+}C%P:|0H܆}-<;OC[~o.$~i}~HQ TvXΈr=b}$vizL4:ȰT|4~*!oXQR6Lk+#t/g lԁߖ[Jڶ_N$k*". xsxX7jRVbAAʯKҎU3)zSNN _'s?f)6X!%ssAkʱ>qƷb hg %n ~p1REGMHH=BJiy[<5 ǁJҖgKR*倳e~HUy)Ag,K)`Vw6bRR:qL#\rclK/$sh*$ 6덤 KԖc 3Z9=Ɣ=o>X Ώ"1 )a`SJJ6k(<c e{%kϊP+SL'TcMJWRm ŏ"w)qc ef꒵i?b7b('"2r%~HUS1\<(`1Wx9=8HY9m:X18bgD1u ~|H;K-Uep,, C1 RV.MR5άh,tWO8WC$ XRVsQS]3GJ|12 [vM :k#~tH30Rf-HYݺ-`I9%lIDTm\ S{]9gOڒMNCV\G*2JRŨ;Rҏ^ڽ̱mq1Eu?To3I)y^#jJw^Ńj^vvlB_⋌P4x>0$c>K†Aļ9s_VjTt0l#m>E-,,x,-W)سo&96RE XR.6bXw+)GAEvL)͞K4$p=Ũi_ѱOjb HY/+@θH9޼]Nԥ%n{ &zjT? Ty) s^ULlb,PiTf^<À] 62R^V7)S!nllS6~͝V}-=%* ʻ>G DnK<y&>LPy7'r=Hj 9V`[c"*^8HpcO8bnU`4JȪAƋ#1_\ XϘHPRgik(~G~0DAA_2p|J묭a2\NCr]M_0 ^T%e#vD^%xy-n}-E\3aS%yN!r_{ )sAw ڼp1pEAk~v<:`'ӭ^5 ArXOI驻T (dk)_\ PuA*BY]yB"l\ey hH*tbK)3 IKZ򹞋XjN n *n>k]X_d!ryBH ]*R 0(#'7 %es9??ښFC,ՁQPjARJ\Ρw K#jahgw;2$l*) %Xq5!U᢯6Re] |0[__64ch&_}iL8KEgҎ7 M/\`|.p,~`a=BR?xܐrQ8K XR2M8f ?`sgWS%" Ԉ 7R%$ N}?QL1|-эټwIZ%pvL3Hk>,ImgW7{E xPHx73RA @RS CC !\ȟ5IXR^ZxHл$Q[ŝ40 (>+ _C >BRt<,TrT {O/H+˟Pl6 I B)/VC<6a2~(XwV4gnXR ϱ5ǀHٻ?tw똤Eyxp{#WK qG%5],(0ӈH HZ])ג=K1j&G(FbM@)%I` XRg ʔ KZG(vP,<`[ Kn^ SJRsAʠ5xՅF`0&RbV tx:EaUE/{fi2;.IAwW8/tTxAGOoN?G}l L(n`Zv?pB8K_gI+ܗ #i?ޙ.) p$utc ~DžfՈEo3l/)I-U?aԅ^jxArA ΧX}DmZ@QLےbTXGd.^|xKHR{|ΕW_h] IJ`[G9{).y) 0X YA1]qp?p_k+J*Y@HI>^?gt.06Rn ,` ?);p pSF9ZXLBJPWjgQ|&)7! HjQt<| ؅W5 x W HIzYoVMGP Hjn`+\(dNW)F+IrS[|/a`K|ͻ0Hj{R,Q=\ (F}\WR)AgSG`IsnAR=|8$}G(vC$)s FBJ?]_u XRvύ6z ŨG[36-T9HzpW̞ú Xg큽=7CufzI$)ki^qk-) 0H*N` QZkk]/tnnsI^Gu't=7$ Z;{8^jB% IItRQS7[ϭ3 $_OQJ`7!]W"W,)Iy W AJA;KWG`IY{8k$I$^%9.^(`N|LJ%@$I}ֽp=FB*xN=gI?Q{٥4B)mw $Igc~dZ@G9K X?7)aK%݅K$IZ-`IpC U6$I\0>!9k} Xa IIS0H$I H ?1R.Чj:4~Rw@p$IrA*u}WjWFPJ$I➓/6#! LӾ+ X36x8J |+L;v$Io4301R20M I$-E}@,pS^ޟR[/s¹'0H$IKyfŸfVOπFT*a$I>He~VY/3R/)>d$I>28`Cjw,n@FU*9ttf$I~<;=/4RD~@ X-ѕzἱI$: ԍR a@b X{+Qxuq$IЛzo /~3\8ڒ4BN7$IҀj V]n18H$IYFBj3̵̚ja pp $Is/3R Ӻ-Yj+L;.0ŔI$Av? #!5"aʄj}UKmɽH$IjCYs?h$IDl843.v}m7UiI=&=0Lg0$I4: embe` eQbm0u? $IT!Sƍ'-sv)s#C0:XB2a w I$zbww{."pPzO =Ɔ\[ o($Iaw]`E).Kvi:L*#gР7[$IyGPI=@R 4yR~̮´cg I$I/<tPͽ hDgo 94Z^k盇΄8I56^W$I^0̜N?4*H`237}g+hxoq)SJ@p|` $I%>-hO0eO>\ԣNߌZD6R=K ~n($I$y3D>o4b#px2$yڪtzW~a $I~?x'BwwpH$IZݑnC㧄Pc_9sO gwJ=l1:mKB>Ab<4Lp$Ib o1ZQ@85b̍ S'F,Fe,^I$IjEdù{l4 8Ys_s Z8.x m"+{~?q,Z D!I$ϻ'|XhB)=…']M>5 rgotԎ 獽PH$IjIPhh)n#cÔqA'ug5qwU&rF|1E%I$%]!'3AFD/;Ck_`9 v!ٴtPV;x`'*bQa w I$Ix5 FC3D_~A_#O݆DvV?<qw+I$I{=Z8".#RIYyjǪ=fDl9%M,a8$I$Ywi[7ݍFe$s1ՋBVA?`]#!oz4zjLJo8$I$%@3jAa4(o ;p,,dya=F9ً[LSPH$IJYЉ+3> 5"39aZ<ñh!{TpBGkj}Sp $IlvF.F$I z< '\K*qq.f<2Y!S"-\I$IYwčjF$ w9 \ߪB.1v!Ʊ?+r:^!I$BϹB H"B;L'G[ 4U#5>੐)|#o0aڱ$I>}k&1`U#V?YsV x>{t1[I~D&(I$I/{H0fw"q"y%4 IXyE~M3 8XψL}qE$I[> nD?~sf ]o΁ cT6"?'_Ἣ $I>~.f|'!N?⟩0G KkXZE]ޡ;/&?k OۘH$IRۀwXӨ<7@PnS04aӶp.:@\IWQJ6sS%I$e5ڑv`3:x';wq_vpgHyXZ 3gЂ7{{EuԹn±}$I$8t;b|591nءQ"P6O5i }iR̈́%Q̄p!I䮢]O{H$IRϻ9s֧ a=`- aB\X0"+5"C1Hb?߮3x3&gşggl_hZ^,`5?ߎvĸ%̀M!OZC2#0x LJ0 Gw$I$I}<{Eb+y;iI,`ܚF:5ܛA8-O-|8K7s|#Z8a&><a&/VtbtLʌI$I$I$I$I$I$IRjDD%tEXtdate:create2022-05-31T04:40:26+00:00!Î%tEXtdate:modify2022-05-31T04:40:26+00:00|{2IENDB`Mini Shell

HOME


Mini Shell 1.0
DIR:/proc/self/root/usr/local/share/perl5/HTML/
Upload File :
Current File : //proc/self/root/usr/local/share/perl5/HTML/TagParser.pm
=head1 NAME

HTML::TagParser - Yet another HTML document parser with DOM-like methods

=head1 SYNOPSIS

Parse a HTML file and find its <title> element's value.

    my $html = HTML::TagParser->new( "index-j.html" );
    my $elem = $html->getElementsByTagName( "title" );
    print "<title>", $elem->innerText(), "</title>\n" if ref $elem;

Parse a HTML source and find its first <form action=""> attribute's value
and find all input elements belonging to this form.

    my $src  = '<html><form action="hoge.cgi">...</form></html>';
    my $html = HTML::TagParser->new( $src );
    my $elem = $html->getElementsByTagName( "form" );
    print "<form action=\"", $elem->getAttribute("action"), "\">\n" if ref $elem;
    my @first_inputs = $elem->subTree()->getElementsByTagName( "input" );
    my $form = $first_inputs[0]->getParent();

Fetch a HTML file via HTTP, and display its all <a> elements and attributes.

    my $url  = 'http://www.kawa.net/xp/index-e.html';
    my $html = HTML::TagParser->new( $url );
    my @list = $html->getElementsByTagName( "a" );
    foreach my $elem ( @list ) {
        my $tagname = $elem->tagName;
        my $attr = $elem->attributes;
        my $text = $elem->innerText;
        print "<$tagname";
        foreach my $key ( sort keys %$attr ) {
            print " $key=\"$attr->{$key}\"";
        }
        if ( $text eq "" ) {
            print " />\n";
        } else {
            print ">$text</$tagname>\n";
        }
    }

=head1 DESCRIPTION

HTML::TagParser is a pure Perl module which parses HTML/XHTML files.
This module provides some methods like DOM interface.
This module is not strict about XHTML format
because many of HTML pages are not strict.
You know, many pages use <br> elemtents instead of <br/>
and have <p> elements which are not closed.

=head1 METHODS

=head2 $html = HTML::TagParser->new();

This method constructs an empty instance of the C<HTML::TagParser> class.

=head2 $html = HTML::TagParser->new( $url );

If new() is called with a URL,
this method fetches a HTML file from remote web server and parses it
and returns its instance.
L<URI::Fetch> module is required to fetch a file.

=head2 $html = HTML::TagParser->new( $file );

If new() is called with a filename,
this method parses a local HTML file and returns its instance

=head2 $html = HTML::TagParser->new( "<html>...snip...</html>" );

If new() is called with a string of HTML source code,
this method parses it and returns its instance.

=head2 $html->fetch( $url, %param );

This method fetches a HTML file from remote web server and parse it.
The second argument is optional parameters for L<URI::Fetch> module.

=head2 $html->open( $file );

This method parses a local HTML file.

=head2 $html->parse( $source );

This method parses a string of HTML source code.

=head2 $elem = $html->getElementById( $id );

This method returns the element which id attribute is $id.

=head2 @elem = $html->getElementsByName( $name );

This method returns an array of elements which name attribute is $name.
On scalar context, the first element is only retruned.

=head2 @elem = $html->getElementsByTagName( $tagname );

This method returns an array of elements which tagName is $tagName.
On scalar context, the first element is only retruned.

=head2 @elem = $html->getElementsByClassName( $class );

This method returns an array of elements which className is $tagName.
On scalar context, the first element is only retruned.

=head2 @elem = $html->getElementsByAttribute( $attrname, $value );

This method returns an array of elements which $attrname attribute's value is $value.
On scalar context, the first element is only retruned.

=head1 HTML::TagParser::Element SUBCLASS

=head2 $tagname = $elem->tagName();

This method returns $elem's tagName.

=head2 $text = $elem->id();

This method returns $elem's id attribute.

=head2 $text = $elem->innerText();

This method returns $elem's innerText without tags.

=head2 $subhtml = $elem->subTree();

This method returns a new object of class HTML::Parser,
with all the elements that are in the DOM hierarchy under $elem.

=head2 $elem = $elem->nextSibling();

This method returns the next sibling within the same parent.
It returns undef when called on a closing tag or on the lastChild node
of a parentNode.

=head2 $elem = $elem->previousSibling();

This method returns the previous sibling within the same parent.
It returns undef when called on the firstChild node of a parentNode.

=head2 $child_elem = $elem->firstChild();

This method returns the first child node of $elem.
It returns undef when called on a closing tag element or on a
non-container or empty container element.

=head2 $child_elems = $elem->childNodes();

This method creates an array of all child nodes of $elem and returns the array by reference.
It returns an empty array-ref [] whenever firstChild() would return undef.

=head2 $child_elem = $elem->lastChild();

This method returns the last child node of $elem.
It returns undef whenever firstChild() would return undef.

=head2 $parent = $elem->parentNode();

This method returns the parent node of $elem.
It returns undef when called on root nodes.

=head2 $attr = $elem->attributes();

This method returns a hash of $elem's all attributes.

=head2 $value = $elem->getAttribute( $key );

This method returns the value of $elem's attributes which name is $key.

=head1 BUGS

The HTML-Parser is simple. Methods innerText and subTree may be
fooled by nested tags or embedded javascript code.

The methods with 'Sibling', 'child' or 'Child' in their names do not cache their results.
The most expensive ones are lastChild() and previousSibling().
parentNode() is also expensive, but only once. It does caching.

The DOM tree is read-only, as this is just a parser.

=head1 INTERNATIONALIZATION

This module natively understands the character encoding used in document
by parsing its meta element.

    <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">

The parsed document's encoding is converted
as this class's fixed internal encoding "UTF-8".

=head1 AUTHORS AND CONTRIBUTORS

    drry [drry]
    Juergen Weigert [jnw]
    Yusuke Kawasaki [kawasaki] [kawanet]
    Tim Wilde [twilde]

=head1 COPYRIGHT AND LICENSE

The following copyright notice applies to all the files provided in this
distribution, including binary files, unless explicitly noted otherwise.

Copyright 2006-2012 Yusuke Kawasaki

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut
# ----------------------------------------------------------------

package HTML::TagParser;
use 5.008_001;
use strict;
use Symbol ();
use Carp ();
use Encode ();

our $VERSION = "0.20";

my $SEC_OF_DAY = 60 * 60 * 24;

#  [000]        '/' if closing tag.
#  [001]        tagName
#  [002]        attributes string (with trailing /, if self-closing tag).
#  [003]        content until next (nested) tag.
#  [004]        attributes hash cache.
#  [005]        innerText combined strings cache.
#  [006]        index of matching closing tag (or opening tag, if [000]=='/')
#  [007]        index of parent (aka container) tag.
#
sub new {
    my $package = shift;
    my $src     = shift;
    my $self    = {};
    bless $self, $package;
    return $self unless defined $src;

    if ( $src =~ m#^https?://\w# ) {
        $self->fetch( $src, @_ );
    }
    elsif ( $src !~ m#[<>|]# && -f $src ) {
        $self->open($src);
    }
    elsif ( $src =~ /<.*>/ ) {
        $self->parse($src);
    }

    $self;
}

sub fetch {
    my $self = shift;
    my $url  = shift;
    if ( !defined $URI::Fetch::VERSION ) {
        local $@;
        eval { require URI::Fetch; };
        Carp::croak "URI::Fetch is required: $url" if $@;
    }
    my $res = URI::Fetch->fetch( $url, @_ );
    Carp::croak "URI::Fetch failed: $url" unless ref $res;
    return if $res->is_error();
    $self->{modified} = $res->last_modified();
    my $text = $res->content();
    $self->parse( \$text );
}

sub open {
    my $self = shift;
    my $file = shift;
    my $text = HTML::TagParser::Util::read_text_file($file);
    return unless defined $text;
    my $epoch = ( time() - ( -M $file ) * $SEC_OF_DAY );
    $epoch -= $epoch % 60;
    $self->{modified} = $epoch;
    $self->parse( \$text );
}

sub parse {
    my $self   = shift;
    my $text   = shift;
    my $txtref = ref $text ? $text : \$text;

    my $charset = HTML::TagParser::Util::find_meta_charset($txtref);
    $self->{charset} ||= $charset;
    if ($charset && Encode::find_encoding($charset)) {
        HTML::TagParser::Util::encode_from_to( $txtref, $charset, "utf-8" );
    }
    my $flat = HTML::TagParser::Util::html_to_flat($txtref);
    Carp::croak "Null HTML document." unless scalar @$flat;
    $self->{flat} = $flat;
    scalar @$flat;
}

sub getElementsByTagName {
    my $self    = shift;
    my $tagname = lc(shift);

    my $flat = $self->{flat};
    my $out = [];
    for( my $i = 0 ; $i <= $#$flat ; $i++ ) {
        next if ( $flat->[$i]->[001] ne $tagname );
        next if $flat->[$i]->[000];                 # close
        my $elem = HTML::TagParser::Element->new( $flat, $i );
        return $elem unless wantarray;
        push( @$out, $elem );
    }
    return unless wantarray;
    @$out;
}

sub getElementsByAttribute {
    my $self = shift;
    my $key  = lc(shift);
    my $val  = shift;

    my $flat = $self->{flat};
    my $out  = [];
    for ( my $i = 0 ; $i <= $#$flat ; $i++ ) {
        next if $flat->[$i]->[000];    # close
        my $elem = HTML::TagParser::Element->new( $flat, $i );
        my $attr = $elem->attributes();
        next unless exists $attr->{$key};
        next if ( $attr->{$key} ne $val );
        return $elem unless wantarray;
        push( @$out, $elem );
    }
    return unless wantarray;
    @$out;
}

sub getElementsByClassName {
    my $self  = shift;
    my $class = shift;
    return $self->getElementsByAttribute( "class", $class );
}

sub getElementsByName {
    my $self = shift;
    my $name = shift;
    return $self->getElementsByAttribute( "name", $name );
}

sub getElementById {
    my $self = shift;
    my $id   = shift;
    return scalar $self->getElementsByAttribute( "id", $id );
}

sub modified {
    $_[0]->{modified};
}

# ----------------------------------------------------------------

package HTML::TagParser::Element;
use strict;

sub new {
    my $package = shift;
    my $self    = [@_];
    bless $self, $package;
    $self;
}

sub tagName {
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    return $flat->[$cur]->[001];
}

sub id {
    my $self = shift;
    $self->getAttribute("id");
}

sub getAttribute {
    my $self = shift;
    my $name = lc(shift);
    my $attr = $self->attributes();
    return unless exists $attr->{$name};
    $attr->{$name};
}

sub innerText {
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $elem = $flat->[$cur];
    return $elem->[005] if defined $elem->[005];    # cache
    return if $elem->[000];                         # </xxx>
    return if ( defined $elem->[002] && $elem->[002] =~ m#/$# ); # <xxx/>

    my $tagname = $elem->[001];
    my $closing = HTML::TagParser::Util::find_closing($flat, $cur);
    my $list    = [];
    for ( ; $cur < $closing ; $cur++ ) {
        push( @$list, $flat->[$cur]->[003] );
    }
    my $text = join( "", grep { $_ ne "" } @$list );
    $text =~ s/^\s+|\s+$//sg;
#   $text = "" if ( $cur == $#$flat );              # end of source
    $elem->[005] = HTML::TagParser::Util::xml_unescape( $text );
}

sub subTree
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $elem = $flat->[$cur];
    return if $elem->[000];                         # </xxx>
    my $closing = HTML::TagParser::Util::find_closing($flat, $cur);
    my $list    = [];
    while (++$cur < $closing)
      {
        push @$list, $flat->[$cur];
      }

    # allow the getElement...() methods on the returned object.
    return bless { flat => $list }, 'HTML::TagParser';
}


sub nextSibling
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $elem = $flat->[$cur];

    return undef if $elem->[000];                         # </xxx>
    my $closing = HTML::TagParser::Util::find_closing($flat, $cur);
    my $next_s = $flat->[$closing+1];
    return undef unless $next_s;
    return undef if $next_s->[000];     # parent's </xxx>
    return HTML::TagParser::Element->new( $flat, $closing+1 );
}

sub firstChild
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $elem = $flat->[$cur];
    return undef if $elem->[000];                         # </xxx>
    my $closing = HTML::TagParser::Util::find_closing($flat, $cur);
    return undef if $closing <= $cur+1;                 # no children here.
    return HTML::TagParser::Element->new( $flat, $cur+1 );
}

sub childNodes
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $child = firstChild($self);
    return [] unless $child;    # an empty array is easier for our callers than undef
    my @c = ( $child );
    while (defined ($child = nextSibling($child)))
      {
        push @c, $child;
      }
    return \@c;
}

sub lastChild
{
    my $c = childNodes(@_);
    return undef unless $c->[0];
    return $c->[-1];
}

sub previousSibling
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;

    ## This one is expensive.
    ## We use find_closing() which walks forward.
    ## We'd need a find_opening() which walks backwards.
    ## So we walk backwards one by one and consult find_closing()
    ## until we find $cur-1 or $cur.

    my $idx = $cur-1;
    while ($idx >= 0)
      {
        if ($flat->[$idx][000] && defined($flat->[$idx][006]))
          {
            $idx = $flat->[$idx][006];  # use cache for backwards skipping
            next;
          }

        my $closing = HTML::TagParser::Util::find_closing($flat, $idx);
        return HTML::TagParser::Element->new( $flat, $idx )
          if defined $closing and ($closing == $cur || $closing == $cur-1);
        $idx--;
      }
    return undef;
}

sub parentNode
{
    my $self = shift;
    my ( $flat, $cur ) = @$self;

    return HTML::TagParser::Element->new( $flat, $flat->[$cur][007]) if $flat->[$cur][007];     # cache

    ##
    ## This one is very expensive.
    ## We use previousSibling() to walk backwards, and
    ## previousSibling() is expensive.
    ##
    my $ps = $self;
    my $first = $self;

    while (defined($ps = previousSibling($ps))) { $first = $ps; }

    my $parent = $first->[1] - 1;
    return undef if $parent < 0;
    die "parent too short" if HTML::TagParser::Util::find_closing($flat, $parent) <= $cur;

    $flat->[$cur][007] = $parent;       # cache
    return HTML::TagParser::Element->new( $flat, $parent )
}

##
## feature:
## self-closing tags have an additional attribute '/' => '/'.
##
sub attributes {
    my $self = shift;
    my ( $flat, $cur ) = @$self;
    my $elem = $flat->[$cur];
    return $elem->[004] if ref $elem->[004];    # cache
    return unless defined $elem->[002];
    my $attr = {};
    while ( $elem->[002] =~ m{
        ([^\s="']+)(\s*=\s*(?:["']((?(?<=")(?:\\"|[^"])*?|(?:\\'|[^'])*?))["']|([^'"\s=]+)['"]*))?
    }sgx ) {
        my $key  = $1;
        my $test = $2;
        my $val  = $3 || $4;
        my $lckey = lc($key);
        if ($test) {
            $key =~ tr/A-Z/a-z/;
            $val = HTML::TagParser::Util::xml_unescape( $val );
            $attr->{$lckey} = $val;
        }
        else {
            $attr->{$lckey} = $key;
        }
    }
    $elem->[004] = $attr;    # cache
    $attr;
}

# ----------------------------------------------------------------

package HTML::TagParser::Util;
use strict;

sub xml_unescape {
    my $str = shift;
    return unless defined $str;
    $str =~ s/&quot;/"/g;
    $str =~ s/&lt;/</g;
    $str =~ s/&gt;/>/g;
    $str =~ s/&amp;/&/g;
    $str;
}

sub read_text_file {
    my $file = shift;
    my $fh   = Symbol::gensym();
    open( $fh, $file ) or Carp::croak "$! - $file\n";
    local $/ = undef;
    my $text = <$fh>;
    close($fh);
    $text;
}

sub html_to_flat {
    my $txtref = shift;    # reference
    my $flat   = [];
    pos($$txtref) = undef;  # reset matching position
    while ( $$txtref =~ m{
        (?:[^<]*) < (?:
            ( / )? ( [^/!<>\s"'=]+ )
            ( (?:"[^"]*"|'[^']*'|[^"'<>])+ )?
        |
            (!-- .*? -- | ![^\-] .*? )
        ) > ([^<]*)
    }sxg ) {
        #  [000]  $1  close
        #  [001]  $2  tagName
        #  [002]  $3  attributes
        #         $4  comment element
        #  [003]  $5  content
        next if defined $4;
        my $array = [ $1, $2, $3, $5 ];
        $array->[001] =~ tr/A-Z/a-z/;
        #  $array->[003] =~ s/^\s+//s;
        #  $array->[003] =~ s/\s+$//s;
        push( @$flat, $array );
    }
    $flat;
}

## returns 1 beyond the end, if not found.
## returns undef if called on a </xxx> closing tag
sub find_closing
{
  my ($flat, $cur) = @_;

  return $flat->[$cur][006]        if   $flat->[$cur][006];     # cache
  return $flat->[$cur][006] = $cur if (($flat->[$cur][002]||'') =~ m{/$});    # self-closing

  my $name = $flat->[$cur][001];
  my $pre_nest = 0;
  ## count how many levels deep this type of tag is nested.
  my $idx;
  for ($idx = 0; $idx <= $cur; $idx++)
    {
      my $e = $flat->[$idx];
      next unless   $e->[001] eq $name;
      next if     (($e->[002]||'') =~ m{/$});   # self-closing
      $pre_nest += ($e->[000]) ? -1 : 1;
      $pre_nest = 0 if $pre_nest < 0;
      $idx = $e->[006]-1 if !$e->[000] && $e->[006];    # use caches for skipping forward.
    }
  my $last_idx = $#$flat;

  ## we move last_idx closer, in case this container
  ## has not all its subcontainers closed properly.
  my $post_nest = 0;
  for ($idx = $last_idx; $idx > $cur; $idx--)
    {
      my $e = $flat->[$idx];
      next unless    $e->[001] eq $name;
      $last_idx = $idx-1;               # remember where a matching tag was
      next if      (($e->[002]||'') =~ m{/$});  # self-closing
      $post_nest -= ($e->[000]) ? -1 : 1;
      $post_nest = 0 if $post_nest < 0;
      last if $pre_nest <= $post_nest;
      $idx = $e->[006]+1 if $e->[000] && defined $e->[006];     # use caches for skipping backwards.
    }

  my $nest = 1;         # we know it is not self-closing. start behind.

  for ($idx = $cur+1; $idx <= $last_idx; $idx++)
    {
      my $e = $flat->[$idx];
      next unless    $e->[001] eq $name;
      next if      (($e->[002]||'') =~ m{/$});  # self-closing
      $nest      += ($e->[000]) ? -1 : 1;
      if ($nest <= 0)
        {
          die "assert </xxx>" unless $e->[000];
          $e->[006] = $cur;     # point back to opening tag
          return $flat->[$cur][006] = $idx;
        }
      $idx = $e->[006]-1 if !$e->[000] && $e->[006];    # use caches for skipping forward.
    }

  # not all closed, but cannot go further
  return $flat->[$cur][006] = $last_idx+1;
}

sub find_meta_charset {
    my $txtref = shift;    # reference
    while ( $$txtref =~ m{
        <meta \s ((?: [^>]+\s )? http-equiv\s*=\s*['"]?Content-Type [^>]+ ) >
    }sxgi ) {
        my $args = $1;
        return $1 if ( $args =~ m# charset=['"]?([^'"\s/]+) #sxgi );
    }
    undef;
}

sub encode_from_to {
    my ( $txtref, $from, $to ) = @_;
    return     if ( $from     eq "" );
    return     if ( $to       eq "" );
    return $to if ( uc($from) eq uc($to) );
    Encode::from_to( $$txtref, $from, $to, Encode::XMLCREF() );
    return $to;
}

# ----------------------------------------------------------------
1;
# ----------------------------------------------------------------