सदस्य:वार्ताबाट/सोर्स
वार्ताबाट is being written in Perl with help of a modified version of CMS::MediaWiki module.
The modified version of CMS::MediaWiki module can be found here: सदस्य:वार्ताबाट/सोर्स/MediaWikiHindi
#!/usr/bin/perl
# Bot created to enhance talk pages of Hindi wikipedia
# This is still a trial version dated 7/17/2011
#
# This is what I intend to do:
# ==Task 1==
# Get a list of links from Special:Allpages page
# Visit each link and build a list of all article pages
# Visit each page, and if it is not a redirect page and does not have
# a speedy delete template,
# Get it's talk page
# If the talk page does not have 'vaarta shiirshaka' template, add it
# Do this for all pages
use strict;
use lib qw(.);
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Request::Common;
use HTML::LinkExtor;
use MediaWikiHindi;
use vars qw($sessioncookie);
*sessioncookie = \%MediaWikiHindi::sessioncookie;
my $browser = LWP::UserAgent->new();
$browser->timeout(10);
my $username='वार्ताबाट';
my $password='123456';
my $mw = CMS::MediaWikiHindi->new(
# 'user' => $username,'pass' => $password},
'host' => 'hi.wikipedia.org',
'path' => 'w',
'debug' => 1
);
#Logging in as bot
&Login;
#Getting Special:Allpages
my @contents = $mw->getSpecialPage(title => '%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:AllPages');
#@contents = $mw->getPage(title => '1854');
#print sprintf('%08d ', ++$i), " $_\n" foreach @contents;
#Building an array of starting points
my @linklist = ();
LOOP: for (my $jj = 0; $jj <= $#contents; $jj++) {
my $line = $contents[$jj];
if ($line =~ m/allpageslist/) { #we found the line containing all links
# print "$line \n";
my $mylink = "";
while ($line =~ m/href\=\"\/wiki/) {
if (index($line, "<tr><td align=\"right\"><a href=\"/wiki/") > 0) {
my $initpos = index($line, "<tr><td align=\"right\"><a href=\"/wiki/");
my $finalpos = index($line, "\">", $initpos+37);
$mylink = substr ($line, $initpos+37, $finalpos-$initpos-37);
push (@linklist, $mylink);
# print "pushing link $mylink \n";
# sleep 1;
$line = substr ($line, $finalpos);
} else { last LOOP; } #exit while loop if no more links to be added
}
}
}
#Building an array of all pages
my @allpages = ();
LINKLISTLOOP: foreach my $startlink (@linklist) {
@contents = $mw->getSpecialPage(title => $startlink);
for (my $jj = 0; $jj <= $#contents; $jj++) {
my $line = $contents[$jj];
if ($line =~ m/\/fieldset/) { #we found the line containing pagelinks
my $mylink = '';
#delete the first wiki link
my $initpos = index $line, '<a href="/wiki/';
my $finalpos = index $line, '" title', $initpos+15;
$line = substr ($line, $finalpos);
while ($line =~ m/\<a href\=\"\/wiki\//) {
$initpos = index $line, '<a href="/wiki/';
$finalpos = index $line, '" title', $initpos+15;
$mylink = substr ($line, $initpos+15, $finalpos-$initpos-15);
push (@allpages, $mylink);
# print "pushing pagelink $mylink \n";
# sleep 1;
$line = substr ($line, $finalpos);
}
last LINKLISTLOOP;
}
}
}
my @pagecontents;
my @talkpagecontents;
#Start adding {{vaarta shiirshaka}} to talk pages
ALLPAGESLOOP: foreach my $page (@allpages) {
@pagecontents = $mw->getPage(title => $page);
my $i;
print $mw->{'debug'} ? '' : sprintf('%08d ', ++$i), " $_\n" foreach @pagecontents;
sleep 1;
#check that it's not a redirect page and does not have a speedy delete template
foreach my $pageline (@pagecontents) {
if ($pageline =~ m/\#REDIRECT/ || $pageline =~ m/\{\{delete\}\}/) { next ALLPAGESLOOP; }
}
#if it's good, get this page's talk page
@talkpagecontents = $mw->getPage(title => "%E0%A4%B5%E0%A4%BE%E0%A4%B0%E0%A5%8D%E0%A4%A4%E0%A4%BE:".$page);
print $mw->{'debug'} ? '' : sprintf('%08d ', ++$i), " $_\n" foreach @talkpagecontents;
sleep 1;
#check that it does not already have vaarta shiirshaka template
foreach my $talkpageline (@talkpagecontents) {
if ($talkpageline =~ m/\{\{वार्ता शीर्षक\}\}/) { next ALLPAGESLOOP; }
}
#everything is good, let's add the template
unshift (@talkpagecontents, "{{वार्ता शीर्षक}}\n");
my $response = $mw->editPage(
title => "%E0%A4%B5%E0%A4%BE%E0%A4%B0%E0%A5%8D%E0%A4%A4%E0%A4%BE:".$page ,
section => '' , # 2 means edit second section etc.
# '' = no section means edit the full page
text => @talkpagecontents,
summary => "{{वार्ता शीर्षक}} जोड़ा" , # optional
);
#if successful, write to logfile
if ($response == 1) {
open(LOGFILE, ">>bolbalalog.txt");
print LOGFILE gmtime()." Added Vaarta Shiirshaka template to $page talk page.\n";
close LOGFILE;
}
exit;
}
#@contents = &GetContents("http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0");
#&PrinttoFile(@contents);
#$url = "http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0";
#print &GetContents($url);
sub Login {
my $rc = $mw->login(
protocol => 'http', # optional, default is http
host => 'hi.wikipedia.org' , # optional here, but wins if (re-)set here
path => 'w', # optional here, but wins
user => 'vaarta' , # default: Perlbot
pass => 'password' ,
);
print $rc ? "Login unsuccessful!\n" : "Login successful!\n";
}
sub GetContents {
my $myurl = $_[0];
my $request = HTTP::Request->new(GET => $myurl);
my $response = $browser->request($request);
if ($response->is_error()) {print $response->status_line;}
my $mycontents = $response->content();
return $mycontents;
}
sub PrinttoFile {
open (MYFILE, ">testing.html");
foreach my $mycontent (@_) {
print MYFILE $mycontent;
}
close (MYFILE);
}
print "\nbye\n";
exit;