用戶:Xcnbot/2

#!/usr/bin/perl 
# catnav bot by WikiPedia:User:下一次登录 
# Disclaimer: No warranty granted, use at your own risk! 
  
# call requirements 
use Getopt::Std; 
use LWP::Simple; 
use LWP::UserAgent; 
use HTTP::Request; 
use HTTP::Response; 
use HTTP::Cookies; 
  
#subroutine 
 
#parameters 
local $username="xcnbot";    #input your username here, only English names are tested. 
local $password="******";    #input your password here 
local $WIKI_PATH="zh.wikipedia.org"; 
local $WIKI_PAGE; 
 
### Login to wiki 
  
# Set up connection data 
my $browser=LWP::UserAgent->new(); 
my @ns_headers = ( 
 'User-Agent' => 'Xcnbot1.0 by 下一次登录',  #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0', 
 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 
 'Accept-Charset' => 'iso-8859-1,*,utf-8', 
 'Accept-Language' => 'en-US', 
); 
  
# Hold cookies 
$browser->cookie_jar( {} ); 
 
{# Login 
    # Make login request 
    $response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin", 
    @ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]); 
  
    # After logging in, we should be redirected to another page. 
    # If we aren't, something is wrong. 
        if($response->code!=302) { #cannot login 
            print "We weren't able to login.\n\n"; 
            close(DEBUG); 
            exit 1; 
        } 
 	print "Logged in \n"; 
} 
 
# Trivial variables 
local $content; #target page content 
local $editcontent; #target edit content 
local $neweditcontent; #modified target edit content 
local $content1; #parent category content 
local $editToken; 
local $editTime; 
 
local $beforeCatnav; 
local @Catnav; 
local $afterCatnav; 
local $bError; 
local $vLine; 
local $nDepth; 
local $nCatnav; 
local @vCat; 
local $bChange; 
 
# Set parameters 
my $vNameU="%E5%BE%8C%E7%87%95%E5%B9%B4%E8%99%9F"; 
 
# get a list of categories from special:allpages  
 
my $article_count=0; #number of articles in allpages 
my @article_name;	#the characters of the article names for log 
my @article_unicode; #the unicode article names for connection 
my $last_string; #the unicode of the last article in the last run (init="%21") 
my $article_line; #one article line in allpage content 
my $article_ID;  
 
open LOG1, ">>log.txt"; 
 
while(1) { #process 
 
	#read last_string.txt and start allpages from that article 
	open FILE, "<last_string.txt"; 
	$last_string=""; 
	while (<FILE>) { 
  	  $last_string.=$_; 
	} 
 
	if(1) { #process allpages contents and make a list 
		#go to allpages and get the contents 
		$URL="http://".$WIKI_PATH."/w/index.php?title=Special:Allpages&from=".$last_string."&namespace=14"; 
		$response=$browser->get($URL, @ns_headers); 
		$filename1=$response->as_string; 
		$article_count=0; #reset the article count 
		 
		{	#truncate the contents 
			#find the start point and extract the content 
			$filestartstr="<table style=\"background: inherit;\" border=\"0\" width=\"100%\">"; 
			$filestart=index($filename1, $filestartstr);   
			$filename1=substr($filename1, $filestart+60); 
			 
			#find the end point and cut 
			$fileendstr="<div class=\"printfooter\">"; 
			$fileend=index($filename1, $fileendstr);   
			$filename1=substr($filename1, 0, $fileend); 
	 
		} #truncate the contents 
		 
		{ #find all the cat names 
			#extract a line (between<td> </td>)and leave rest to $filename1 
			$filestartstr="<td>"; 
			$fileendstr="</td>"; 
			$filestart=index($filename1, $filestartstr)+4; 
			$fileend=index($filename1, $fileendstr); 
			$article_line=substr($filename1, $filestart, $fileend-$filestart); 
			$filename1=substr($filename1, $fileend+5); 
		 
			while($article_count<=202)	{	#if there is article names in allpage contents 
	 
					#process $article_line 
						#extract the unicode name 
						$filestartstr="<a href=\"/wiki/"; 
						$filestart=index($article_line, $filestartstr)+15+9; 
						$article_line=substr($article_line, $filestart); 
						$fileendstr="\""; 
						$fileend=index($article_line, $fileendstr); 
						$article_unicode[$article_count]=substr($article_line, 0, $fileend); 
						$article_line=substr($article_line, $fileend+1); 
						 
						#extract the character name 
						$filestartstr="title=\""; 
						$filestart=index($article_line, $filestartstr)+7; 
						$article_line=substr($article_line, $filestart); 
						$fileendstr="\""; 
						$fileend=index($article_line, $fileendstr); 
						$article_name[$article_count]=substr($article_line, 0, $fileend); 
					 
						$article_count+=1; 
	 
				#extract a line (between<td> </td>)and leave rest to  
				$filestartstr="<td>"; 
				$fileendstr="</td>"; 
				$filestart=index($filename1, $filestartstr)+4; 
				$fileend=index($filename1, $fileendstr); 
				$article_line=substr($filename1, $filestart, $fileend-$filestart); 
				$filename1=substr($filename1, $fileend+5); 
				 
			}	#while  
		} #find all the cat names 
	} #process allpages contents and make a list 
	 
	$article_ID=0; 
 
	while($article_ID<$article_count) { #go through all the pages and process 
	 
	$vNameU=$article_unicode[$article_ID]; 
# Connect to root cat 
  $WIKI_PAGE=$vNameU; 
  $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; 
  $response=$browser->get($URL, @ns_headers); 
  $content=$response->as_string; 
	 
	print "\n"; 
	print $URL;  
	print LOG1 "\n"; 
	print LOG1 $URL;  
	 
 	$URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; 
 	$response=$browser->get($URL, @ns_headers); 
 	$editcontent=$response->as_string; 
   		  
 	# Get EditToken 
 	($editToken) = ( $editcontent =~ m/value\=\"([0-9a-f\\]*)\" name\=\"wpEditToken\"/ ); 
  ($editTime) = ( $editcontent =~ m/value\=\"([0-9a-f]*)\" name\=\"wpEdittime\"/ ); 
  		  
  $filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'"; 
  $fileendstr="</textarea>"; 
  $filestart= index($editcontent, $filestartstr); 
  $filestart+=92; 
  $fileend= index($editcontent, $fileendstr); 
  $editcontent=substr($editcontent, $filestart, $fileend-$filestart); 
	$afterCatnav=$editcontent; 
	 
	print "\nC "; 
	print LOG1 "\nC "; 
 	$bChange=0; 
 	 
 	#organize edit content for catnav 
	$filestartstr="{{catnav";  
	$neweditcontent=$editcontent; 
	while(index($neweditcontent, $filestartstr)>=0) { 
			substr($neweditcontent, index($neweditcontent, $filestartstr), length($filestartstr) ) ="{{Catnav"; 
	} 
 
  #find all {{catnav 
	$filestartstr="{{Catnav";  
	$nCatnav=0; 
	while(index($neweditcontent, $filestartstr)>=0)  
	{ #find all {{catnav 
		$filestartstr="{{Catnav"; 
		$neweditcontent=substr($neweditcontent, index($neweditcontent, $filestartstr)); 
		$filestartstr="}}"; 
		$Catnav[$nCatnav]=substr($neweditcontent, 0, index($neweditcontent, $filestartstr)+2); 
		$neweditcontent=substr($neweditcontent, index($Catnav, $filestartstr)+2); 
		 
		$nCatnav+=1; 
		$filestartstr="{{Catnav";  
	} #find all {{catnav 
	 
  print $nCatnav; 
  print "cn(s) "; 
  print LOG1 $nCatnav; 
  print LOG1 "cn(s) "; 
     
  # process catnav  
  $nCatnav=0; 
  $filestartstr="title=\"catnav\" style="; 
  while(index($content, $filestartstr)>=0) 
  { # process catnav 
   	 
   	{# process catnav 
   		#get a line 
	   	$filestartstr="title=\"catnav\" style="; 
	   	$filestart=index($content, $filestartstr); 
	   	$content=substr($content, $filestart+21); 
 
	  	$fileendstr="</div>"; 
	   	$fileend=index($content, $fileendstr);   
	   	$vLine=substr($content, 0, $fileend); 
	   	$content=substr($content, $fileend+5); 
 
	   	#get all links 
	   	$filestartstr="..."; 
	   	if(index($vLine, $filestartstr)>=0) 
	   	{ 
	   		$filestart=index($vLine, $filestartstr); 
	   		$vLine=substr($vLine, $filestart+3); 
	   	} 
	   	 
	   	$nDepth=0; 
	   	$filestartstr="<a href=\"/wiki/Category:"; 
	   	while(index($vLine, $filestartstr)>=0) 
	   	{ #while there is a cat link 
		   	$filestartstr="<a href=\"/wiki/Category:"; 
		   	$filestart=index($vLine, $filestartstr)+24; 
		   	$vLine=substr($vLine, $filestart); 
 
		   	$filestartstr="\" title=\"Category:"; 
		   	$filestart=index($vLine, $filestartstr); 
		   	$vCat[$nDepth]=substr($vLine, 0, $filestart); 
		   	$vLine=substr($vLine, $filestart+18); 
 
		   	$nDepth+=1; 
	   	} #while there is a cat link 
	   
	  }# process catnav 
    	 
	   	#check parent cats 
	   	$bError=0; 
 			local $Page; 
 			$Page=$vNameU; 
	   	while($nDepth>0) 
	   	{ 
	   		$nDepth=$nDepth-1; 
	   		if($bError==0) 
	   		{ 
				  $WIKI_PAGE=$Page; 
				  $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; 
				  $response=$browser->get($URL, @ns_headers); 
  				$content1=$response->as_string; 
	   			 
	   			$filestartstr="<div id=\"catlinks\"><p class='catlinks'><a href=\"/wiki/Special:Categories\" title="; 
	   			$content1=substr($content1, index($content1, $filestartstr)); 
	   			$filestartstr="</span></p></div>"; 
	   			$content1=substr($content1, 0, index($content1, $filestartstr)); 
	   			 
	   			$Page=$vCat[$nDepth]; 
					my $Page1; 
	   			$Page1="Category:".$vCat[$nDepth]."\" title"; 
					 
	   			if(index($content1, $Page1)<0) 
	   			{ 
	   				$bError=1; 
	   				print "Err "; 
	   				print LOG1 "Err "; 
	   			} 
	   			 
	   		} 
	   	} 
	   	 
	   	#delete catnav   	 
	   	if($bError>0) 
	   	{ #delete catnav 
				my $cnstring="{{catnav|"; 
	  		substr($editcontent, index($editcontent, $cnstring), length($cnstring) ) ="{{Catnav|"; 
	  		substr($editcontent, index($editcontent, $Catnav[$nCatnav]), length($Catnav[$nCatnav])+1 ) =""; 
				$bChange=1; 
	   	} #delete catnav 
 		  
 		 $filestartstr="title=\"catnav\" style="; 
     $nCatnav+=1; 
  } # process catnav 
 
 	#update 
	if($bChange>0) 
	{	 
	   		 
	   		{ #check for illegal characters 
				my $special_char; 
				 
				$special_char="""; #" 
				while(index($neweditcontent, $special_char)>=0) { 
					substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="\""; 
				} 
				$special_char="<"; #< 
				while(index($neweditcontent, $special_char)>=0) { 
					substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="<"; 
				} 
				$special_char=">"; #> 
				while(index($neweditcontent, $special_char)>=0) { 
					substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) =">"; 
				} 
				$special_char="&"; #& 
				while(index($neweditcontent, $special_char)>=0) { 
					substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="&"; 
				} 
				$special_char=" "; #  
				while(index($neweditcontent, $special_char)>=0) { 
					substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) =" "; 
				} 
			  } #check for illegal characters 
 
		$WIKI_PAGE=$vNameU; 
    $URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; 
						 
			if(1) {#Update 
        $response=$browser -> 
          post($URL, @ns_headers, Content_Type=>'form-data',Content=> 
           [  wpTextbox1 => $editcontent, 
		          wpSummary => "Testing: Wrong catnav deleted.", 
              wpSave => "Save page", 
              wpSection => "", 
              wpEdittime => $editTime, 
              wpEditToken => $editToken, 
              wpMinoredit => "1", 
              ]);	 
 
      print "Changed."; 
      print LOG1 "Changed."; 
    }  
 
  } 
  		 
  			 
	$article_ID+=1; 
	 
	} #while ID<count 
	 
	if(1)	{	#record last string.txt 
		open INPUT, ">last_string.txt"; 
		print INPUT $article_unicode[$article_count-1]; 
		close INPUT; 
	} 
	 
} #while whole