用户:Xcnbot/1

#!/usr/bin/perl 
# catnav bot by WikiPedia:User:下一次登录 
# Sisclaimer: No warranty granted, use at your own risk! 
  
# call requirements 
use Getopt::Std; 
use LWP::Simple; 
use LWP::UserAgent; 
use HTTP::Request; 
use HTTP::Response; 
use HTTP::Cookies; 
  
#subroutine 
 
#parameters 
local $username="xcnbot";    #input your username here, only English names are tested. 
local $password="******";    #input your password here 
local $WIKI_PATH="zh.wikipedia.org"; 
local $WIKI_PAGE; 
 
### Login to wiki 
  
# Set up connection data 
my $browser=LWP::UserAgent->new(); 
my @ns_headers = ( 
 'User-Agent' => 'Xcnbot 1.0 by 下一次登录',  #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0', 
 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 
 'Accept-Charset' => 'iso-8859-1,*,utf-8', 
 'Accept-Language' => 'en-US', 
); 
  
# Hold cookies 
$browser->cookie_jar( {} ); 
  
{# Login 
    # Make login request 
    $response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin", 
    @ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]); 
  
    # After logging in, we should be redirected to another page. 
    # If we aren't, something is wrong. 
        if($response->code!=302) { #cannot login 
            print "We weren't able to login.\n\n"; 
            close(DEBUG); 
            exit 1; 
        } 
} 
 	print "Logged in \n"; 
 
 
# Trivial variables 
local $content; 
local $content1; 
local $content2; 
local @cnTree; 
local $editToken; 
local $editTime; 
 
 
# Set parameters 
local $DEPTH=5; #predefined DEPTH 
local $cChange=0; #Counter of CHANGEs 
my $vName; #Variable: current cat NAME 
my $vNameU="%E9%A0%81%E9%9D%A2%E5%88%86%E9%A1%9E";  #vName: Unicode, starting from “页面分类” 
my @aTree; #Array: current cat-TREE 
my $vDepth=0; #Variable: current cat-tree DEPTH 
my @aChild; #Array: current cat's CHILDREN    
my @aChildU; #aChildren: Unicode    
  
# Connect to root cat 
  $WIKI_PAGE=$vNameU; 
  $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; 
  $response=$browser->get($URL, @ns_headers); 
  $content=$response->as_string; 
    
  {  # extract vName 
   $filestartstr="<h1 class=\"firstHeading\">Category:"; 
   $filestart=index($content, $filestartstr);  
   $content1=substr($content, $filestart+34); 
        
   $fileendstr="</h1>"; 
   $fileend=index($content1, $fileendstr);   
   $vName=substr($content1, 0, $fileend); 
  }  # extract vName 
 
   print "got vName\n"; 
 
 # Change @aTree 
 $aTree[$vDepth]=$vName; 
 $vDepth+=1; 
  
 if(1)  
 { #debug 
	open INPUT, ">debug1.txt"; 
	print INPUT $URL; 
	print INPUT "\n\n before digui\n\n"; 
  close INPUT; 
 } #debug 
 
	if(1)  
	  { #log 
 		  open INPUT, ">cnlog.txt"; 
		 	print INPUT "program start\n"; 
			close INPUT; 
		} #log    	 
  
 &gotocat($vName, $vNameU, $vDepth, @aTree); 
  
 
sub gotocat 
{ #递归程序 
  my($vName, $vNameU, $vDepth, @aTree) = @_; 
     	if(1)  
		  { #debug log 
 			  open INPUT, ">diguilog.txt"; 
			 	print INPUT "Name="; 
		 		print INPUT $vName; 
			 	print INPUT "  UName="; 
		 		print INPUT $vNameU; 
			 	print INPUT "  vDepth="; 
			 	print INPUT "  aTree="; 
		 		print INPUT $vDepth; 
		 		for($i=0;$i<$vDepth;$i+=1) 
		 		{ 
		 			print INPUT $aTree[$i]; 
		 			print INPUT ">"; 
		 		} 
			 	print INPUT "\n"; 
				close INPUT; 
			} #debug log    	 	 
 
  my @aChild; 
  my @aChildU; 
  my $nChild=0; #Child cat number 
 
	if($vDepth>$DEPTH)  
	{ #depth exceeds 
		$aTree[1]="..."; 
		for($i=2;$i<$DEPTH;$i+=1) 
		{ 
			$aTree[$i]=$aTree[$i+1]; 
		} 
		$vDepth=$DEPTH; 
		 
     	if(1)  
		  { #debug log 
 			  open INPUT, ">>diguilog.txt"; 
			 	print INPUT "Depth exceeds\n"; 
				close INPUT; 
			} #debug log    	 	 
	} 
	 
	print "in digui. vDepth="; 
	print $vDepth; 
	print "\n"; 
 
 
   
  if(1) 
  { # list and sort all children 
    ## list 
    print "list "; 
     
    $WIKI_PAGE=$vNameU; 
    $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; 
    $response=$browser->get($URL, @ns_headers); 
    $content=$response->as_string; 
  
 		$content1=$content; 
    $filestartstr="<a class=\"CategoryTreeLabel  CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; 
 
    while(index($content1, $filestartstr)>=0)  
    { #while there is more children 
			print "c "; 
			 
     {  # extract a Child cat line 
      $filestartstr="<a class=\"CategoryTreeLabel  CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; 
      $filestart=index($content1, $filestartstr);  
      $content1=substr($content1, $filestart+99); 
           
      $fileendstr="</a></div>"; 
      $fileend=index($content1, $fileendstr);   
      $content2=substr($content1, 0, $fileend); 
      $content1=substr($content1, $fileend+10); 
     }  # extract a Child cat line    
 
     { # cat name and Uname 
     	$fileendstr="\">"; 
     	$fileend=index($content2, $fileendstr);   
     	$aChildU[$nChild]=substr($content2, 0, $fileend); 
     	$aChild[$nChild]=substr($content2, $fileend+2); 
     } # cat name and Uname 
     
     $nChild+=1; 
    } #while there is more children 
		 
   if(0) { ## check if there is "next 200" 
     $filestartstr="<br style=\"clear:both;\"/>"; 
   	 $filestart=index($content, $filestartstr); 
   	 $content1=substr($content, $filestart+25); 
   	 $fileendstr="<h2>"; 
   	 $fileend=index($content1, $fileendstr);   
   	 $content1=substr($content1, 0, $fileend); 
   	 $filestartstr="200</a>)<div id"; 
 
   	 while(index($content1, $filestartstr)>=0) 
   	 {	# while there is "next 200" 
	   	 print "200 "; 
			  
	 		{	## extract url 
 	   	  $filestartstr="200) (<a href=\""; 
		   	$filestart=index($content1, $filestartstr); 
		  	$content1=substr($content1, $filestart+15); 
		    $fileendstr="\" title="; 
    	  $fileend=index($content1, $fileendstr); 
      	$WIKI_PAGE=substr($content1, 0, $fileend); 
    	} ## extract url 
 
			$URL="http://".$WIKI_PATH.$WIKI_PAGE; 
			if(1)  
			  { #log 
		 		  open INPUT, ">>cnlog.txt"; 
				 	print INPUT $URL; 
				 	print INPUT "\n"; 
					close INPUT; 
				} #log    	 
			 
	    $response=$browser->get($URL, @ns_headers); 
	    $content=$response->as_string; 
	    $content1=$content; 
	    $filestartstr="<a class=\"CategoryTreeLabel  CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; 
	    while(index($content1, $filestartstr)>=0)  
	    { #while there is more children 
	    	print "c "; 
 		    { # extract a Child cat line 
			    $filestartstr="<a class=\"CategoryTreeLabel  CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; 
 		  	  $filestart=index($content1, $filestartstr);  
      		$content1=substr($content1, $filestart+99); 
           
      		$fileendstr="</a></div>"; 
		      $fileend=index($content1, $fileendstr);   
    		  $content2=substr($content1, 0, $fileend); 
		      $content1=substr($content1, $fileend+10); 
    		} # extract a Child cat line    
       
	      { # cat name and Uname 
		     	$fileendstr="\">"; 
		     	$fileend=index($content2, $fileendstr);   
		     	$aChildU[$nChild]=substr($content2, 0, $fileend); 
		     	$aChild[$nChild]=substr($content2, $fileend+2); 
		    } # cat name and Uname 
		     
		     $nChild+=1; 
		  } #while there is more children 
 
      # next 200? 
    	$filestartstr="<br style=\"clear:both;\"/>"; 
    	$filestart=index($content, $filestartstr); 
    	$content1=substr($content, $filestart+25); 
   	  $fileendstr="<h2>"; 
    	$fileend=index($content1, $fileendstr);   
    	$content1=substr($content1, 0, $fileend); 
   	  $filestartstr="200</a>)<div id"; 
   	 }	# while there is "next 200" 
			    	 
 	 } ## check if there is "next 200" 
   
   ## sort 
   	print "sort "; 
   	 
  	my $i,$j; 
  	my $swap; 
  	for($i=0;$i<$nChild-1;$i+=1) 
  	{ 
  		for($j=$i+1;$j<$nChild;$j+=1) 
  		{ 
  			if($aChild[$i] gt $aChild[$j]) 
  			{ 
  				$swap=$aChild[$i]; 
  				$aChild[$i]=$aChild[$j]; 
  				$aChild[$j]=$swap; 
 
  				$swap=$aChildU[$i]; 
  				$aChildU[$i]=$aChildU[$j]; 
  				$aChildU[$j]=$swap; 
  			} 
  		} 
  	} 
  	 
  	print "entered current page. nChild="; 
  	print $nChild; 
  	print "\n"; 
  	 
	  if(0)  
	  { #debug 
			 open INPUT, ">>debug1.txt"; 
			 my $i; 
			 for($i=0;$i<$nChild;$i+=1) 
			 { 
			 	print INPUT $aChild[$i]; 
			 	print INPUT "  "; 
			 	print INPUT $aChildU[$i]; 
			 	print INPUT "\n"; 
			 } 
		   print INPUT "\n\n\n"; 
			 close INPUT; 
		} #debug    	 
  } # list and sort all children 
 
	if(1)  
	  { #log 
 		  open INPUT, ">>cnlog.txt"; 
		 	print INPUT "Digui: Cat="; 
		 	print INPUT $vName; 
		 	print INPUT ", Tree="; 
		 	for($i=0;$i<$vDepth;$i+=1) 
		 	{  
		 		print INPUT $aTree[$i]; 
		 		print INPUT ">"; 
		 	} 
		 	print INPUT "\n"; 
		 	print INPUT "    "; 
		 	print INPUT $nChild; 
		 	print INPUT " children: "; 
		 	for($i=0;$i<$nChild;$i+=1) 
		 	{  
		 		print INPUT $aChild[$i]; 
		 		print INPUT " "; 
		 	} 
		 	print INPUT "\n"; 
			close INPUT; 
		} #log    	 
 
  
	my $i; 
	for($i=0;$i<$nChild;$i+=1)  
  { # check and add catnav to all children 
  	my $cnDepth; 
  	my $bFound=0;	#found same tree? 
  	my $bSame=1;	#cat name is the same? 
  	my $cnFound=0; 
		# get edit content 
			$WIKI_PAGE=$aChildU[$i]; 
			#$WIKI_PAGE="%E4%BA%9A%E6%B4%B2%E5%9B%BD%E5%AE%B6"; 
    	$URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; 
    	$response=$browser->get($URL, @ns_headers); 
    	$content=$response->as_string; 
   		  
    	# Get EditToken 
    	($editToken) = ( $content =~ m/value\=\"([0-9a-f\\]*)\" name\=\"wpEditToken\"/ ); 
    	($editTime) = ( $content =~ m/value\=\"([0-9a-f]*)\" name\=\"wpEdittime\"/ ); 
  		  
    	$filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'"; 
    	$fileendstr="</textarea>"; 
    	$filestart= index($content, $filestartstr); 
    	$filestart+=92; 
    	$fileend= index($content, $fileendstr); 
    	$content=substr($content, $filestart, $fileend-$filestart); 
     
    # check if there is Catnav 
   	$filestartstr="{{Catnav|"; 
   	$content1=$content; 
    while(index($content1, $filestartstr)>=0) 
    { #while there is still Catnav 
    	$cnFound=1; 
	   	$cnDepth=0; 
    	$filestart=index($content1, $filestartstr); 
    	$content1=substr($content1, $filestart+9); 
    	$fileendstr="}}"; 
    	$fileend=index($content1, $fileendstr); 
    	$content2=substr($content1, 0, $fileend); 
    	$content1=substr($content1, $fileend+1); 
 
			$filestartstr="|"; 
			while(index($content2, $filestartstr)>=0) 
			{ #while there is | 
				$filestart=index($content2, $filestartstr); 
				$cnTree[$cnDepth]=substr($content2, 0, $filestart); 
				$content2=substr($content2, $filestart+1); 
				$cnDepth+=1; 
			} #while there is | 
			#the last one 
			$cnTree[$cnDepth]=$content2; 
			$cnDepth+=1; 
 
		if(0)  
	  { #debug 
			 open INPUT, ">>debug1.txt"; 
			 my $j; 
				for($j=0;$j<$cnDepth;$j+=1)  
			  { # check and add catnav to all children 
					 	 
					 { 
					 	print INPUT $cnTree[$j]; 
					 	print INPUT "  "; 
					 } 	 
					 	print INPUT "\n\n\n"; 
			  	 
				} # check and add catnav to all children 
		} #debug 
		 
		$bSame=1;  
		if($vDepth == $cnDepth) 
		{ 
			for($j=0;$j<$cnDepth;$j+=1)  
			{ 
				if($cnTree[$j] ne $aTree[$j])	{ 
				$bSame=0;} 
			} 
		} 
		else 
		{ 
			$bSame=0; 
		} 
		 
		if($bSame) 
		{ 
			$bFound=1; 
		} 
 
   	$filestartstr="{{Catnav|"; 
    } #while there is still Catnav 
		 
		print "entered child page. bFound=";	 
		print $bFound;	 
		print "\n";	 
		 
		if($bFound<1) 
		{	# add new tree 
			my $treecontent; 
			$treecontent="{{Catnav"; 
			for($j=0;$j<$vDepth;$j+=1) 
			{ 
				$treecontent=$treecontent."|".$aTree[$j]; 
			} 
			if($cnFound) { 
				$treecontent=$treecontent."}}\n"; 
			} 
			else { 
				$treecontent=$treecontent."}}\n\n"; 
			} 
						 
			$content=$treecontent.$content; 
 
			$WIKI_PAGE=$aChildU[$i]; 
    	$URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; 
 
			#check for illegal characters 
			my $special_char; 
			 
			$special_char="""; #" 
			while(index($content, $special_char)>=0) { 
				substr($content, index($content, $special_char), length($special_char) ) ="\""; 
			} 
			$special_char="<"; #< 
			while(index($content, $special_char)>=0) { 
				substr($content, index($content, $special_char), length($special_char) ) ="<"; 
			} 
			$special_char=">"; #> 
			while(index($content, $special_char)>=0) { 
				substr($content, index($content, $special_char), length($special_char) ) =">"; 
			} 
			$special_char="&"; #& 
			while(index($content, $special_char)>=0) { 
				substr($content, index($content, $special_char), length($special_char) ) ="&"; 
			} 
			 
			if(1) {#Update 
        $response=$browser -> 
          post($URL, @ns_headers, Content_Type=>'form-data',Content=> 
           [  wpTextbox1 => $content, 
		          wpSummary => "[[User:xcnbot|xcnbot]] testing", 
              wpSave => "Save page", 
              wpSection => "", 
              wpEdittime => $editTime, 
              wpEditToken => $editToken, 
              wpMinoredit => "1", 
              ]);	 
      print "Change made\n"; 
       
     	if(1)  
		  { #log 
 			  open INPUT, ">>cnlog.txt"; 
			 	print INPUT "    Child:"; 
		 		print INPUT $aChild[$i]; 
		 		print INPUT " change made.\n"; 
				close INPUT; 
			} #log    	 
	 
    	} #Update 
		} 
    		 
	} # check and add catnav to all children 
    
  # call sub-gotocat on every child 
  my $ch; 
 	for($ch=0;$ch<$nChild;$ch+=1)  
 	{ 
  	# Change @aTree 
     	if(1)  
		  { #log 
 			  open INPUT, ">>cnlog.txt"; 
			 	print INPUT "Go into: Child="; 
		 		print INPUT $aChild[$ch]; 
		 		print INPUT "\n"; 
				close INPUT; 
			} #log    	 	 
	  $aTree[$vDepth]=$aChild[$ch]; 
		&gotocat($aChild[$ch], $aChildU[$ch], $vDepth+1, @aTree); 
     	if(1)  
		  { #log 
 			  open INPUT, ">>cnlog.txt"; 
			 	print INPUT "Jump outto: Parent="; 
		 		print INPUT $vName; 
		 		print INPUT "\n"; 
				close INPUT; 
			} #log    	 	 
		 
	} 
} #递归程序