package CMS::MediaWikiHindi;
#######################################################################
# Author: Reto Schär
# Copyright (C) by Reto Schär (find details at the end of this script)
#
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself, either Perl version 5.8.6 or,
# at your option, any later version of Perl 5 you may have available.
#
# Locations:
# http://meta.pgate.net/cms-mediawiki/
# http://search.cpan.org/dist/CMS-MediaWiki/lib/CMS/MediaWiki.pm
#
# Find more MediaWiki reference in general on:
# http://www.infocopter.com/know-how/mediawiki-reference/
#######################################################################
# Modifications for Hindi Wikipedia made by:
# Longhairandabeard
#######################################################################
use strict;
my $package = __PACKAGE__;
our $VERSION = '0.8013';
use LWP::UserAgent;
use HTTP::Request::Common;
use HTTP::Cookies;
use HTML::Form;
use IO::File;
# GLOBAL VARIABLES
my %Var = ();
my $contentType = "";
my $ua;
my $cookie_jar;
my $sessionid;
use vars qw($sessioncookie);
$| = 1;
#----- FORWARD DECLARATIONS & PROTOTYPING
sub Error($);
sub Debug($);
sub new {
my $type = shift;
my %params = @_;
my $self = {};
$self->{protocol} = $params{protocol} || 'http'; # optional
$self->{host} = $params{host} || 'localhost';
$self->{path} = $params{path} || '';
$self->{debug} = $params{debug} || 0; # 0, 1, 2
$Var{SERVER_SIG} = '*Unknown*';
$Var{EDIT_TIME_BEFORE} = '*Unknown*';
Debug "$package V$VERSION" if $self->{debug};
$ua = LWP::UserAgent->new;
$ua->agent('MediaWikiHindi/0.8013');
# $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461');
$ua->cookie_jar({file => 'mediawikihindi-cookies.txt', autosave => 1});
$cookie_jar = $ua->cookie_jar;
bless $self, $type;
}
sub login {
my $self = shift;
my %args = @_;
if ($self->{debug}) {
while (my ($k, $v) = each %args) {
Debug "[login] $k = $v";
}
}
$args{protocol} ||= $self->{protocol};
$args{path} ||= $self->{path};
$self->{path} = $args{path}; # globalize, if it was set here
$args{host} ||= $self->{host};
$self->{host} = $args{host}; # globalize
my $index_path = "/index.php";
$index_path = "/$args{path}/index.php" if $args{path};
#Let's logout from any previous sessions
# my $logout_url = "$args{protocol}://$args{host}$index_path?title=%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:UserLogout";
# my $request = HTTP::Request->new(GET => $logout_url);
# my $resp = $ua->request($request);
my $login_url = "$args{protocol}://$args{host}$index_path?title=%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:Userlogin\&action=submitlogin\&type=login";
Debug "[login] POST $login_url\..." if $self->{debug};
my $request = HTTP::Request->new(POST => $login_url);
$request->header(Content_Type => 'application/x-www-form-urlencoded');
# $request->header(Cookie => $mycookie);
$request->content("wpName=$args{user}&wpPassword=$args{pass}&wpLoginattempt=लॉग इन&wpRemember=1");
my $resp = $ua->request($request);
if ($self->{debug} > 2) {
my @contents = $resp->content;
open (TESTING, ">testing.html");
foreach my $contentline (@contents) {
print TESTING "$contentline\n";
}
close TESTING;
}
my $login_okay = 0;
while (my ($k, $v) = each %{$resp->{_headers}}) {
Debug "(header) $k = $v" if $self->{debug} > 2;
if ($_ =~ /^set-cookie$/i) {
my $arr = $v;
if ($arr =~ /^ARRAY(.+)$/) {
foreach (@$arr) {
# wikiUserID or wikidbUserID
if ($_ =~ /UserID=\d+\;/i) {
# Success!
$login_okay = 1;
}
Debug "(cookie) $_" if $self->{debug} > 1;
if ($_ =~ /session/) {
$sessioncookie = $_;
print "Session Cookie: $sessioncookie\n" if $self->{debug} > 1;
}
}
}
else {
Debug "=====> cookie: $arr" if $self->{debug};
}
}
if ($_ =~ /^server$/i) {
$Var{'SERVER_SIG'} = $v;
}
}
$sessionid = substr($sessioncookie, 15, 32);
# $cookie_jar->set_cookie(0, "hiwiki_session", $sessionid, "\/", "hi.wikipedia.org", 80, 1, 0, 2592000, 0, ) ;
# $cookie_jar->save;
open(COOKIEFILE, "<mediawikihindi-cookies.txt") || print "Could not open file for reading\n$!\n";
my @cookielines = <COOKIEFILE>;
chomp (@cookielines);
my @newcookielines;
my $sessioncookiefound;
foreach my $cookieline (@cookielines) {
if ($cookieline =~ /session/) {
print "sessioncookieline found\n";
$sessioncookiefound = 1;
$cookieline = "Set-Cookie3: $sessioncookie\n";
# $ua->cookie_jar->set_cookie(0, 'hiwiki_session', $sessioncookie, "\/", 'hi.wikipedia.org', '', 1, 0, 1840000, 0, \httponly ) ;
}
push (@newcookielines, $cookieline);
}
if (!$sessioncookiefound) {
push (@newcookielines, "Set-Cookie3: $sessioncookie\n");
print "pushing Set-Cookie3: $sessioncookie\n";
}
close COOKIEFILE;
print "This is what I want to print: Set-Cookie3: $sessioncookie\n";
open(COOKIEFILE, "+>mediawikihindi-sessioncookies.txt") || print "Could not open file for writing\n$!\n";
foreach my $cookieline (@newcookielines) {
print COOKIEFILE "$cookieline\n" || print "Could not write\n$!\n";
print "writing to file: $cookieline\n";
}
close COOKIEFILE;
return $login_okay ? 0 : 1;
}
sub editPage {
my $self = shift;
my %args = @_;
if ($self->{debug}) {
while (my ($k, $v) = each %args) {
Debug "[editPage] $k = \"$v\"";
}
while (my ($k, $v) = each %Var) {
Debug "[editPage] VAR $k = \"$v\"";
}
}
my $WHOST = $self->{host} || 'localhost';
my $WPATH = $self->{path} || '';
$args{protocol} ||= $self->{protocol};
$args{text} ||= '* No text *';
$args{summary} ||= 'By CMS::MediaWiki';
$args{section} ||= '';
Debug "Editing page '$args{title}' (section '$args{section}')..." if $self->{debug};
my $edit_section = $args{section} ? "\§ion=$args{section}" : '';
# (Pre-)fetch page...
my $myurl = "$args{protocol}://$WHOST/$WPATH/index.php?title=$args{title}&action=edit$edit_section";
my $request = HTTP::Request->new(GET => $myurl);
my $resp = $ua->request($request);
my @lines = split /\n/, $resp->content;
my $token = my $edit_time = my $start_time = '';
foreach (@lines) {
#Debug "X $_";
if (/wpEditToken/) {
print "EditToken line: $_ \n";
/value\=\"(.+)\" *name/i;
$token = $1;
}
if (/wpEdittime/) {
s/type=.?hidden.? *value="(.+)" *name/$1/i;
$edit_time = $1 || '';
$Var{EDIT_TIME_BEFORE} = $edit_time;
}
if (/wpStarttime/) {
s/type=.?hidden.? *value="(.+)" *name/$1/i;
$start_time = $1 || '';
}
if (/<title>/i) {
s/<title>(.+)<\/title>/$1/i;
$Var{PAGE_TITLE} = $1 || '';
}
if (/index.php\?title=(.+?):Copyright.+/i) {
$Var{WIKI_NAME} = $1 || '';
}
}
if ($self->{debug}) {
Debug "token = $token" if $self->{debug} > 1;
Debug "edit_time (before update) = $edit_time";
}
$token = s/\\/\\\\/;
my %tags;
$tags{wpTextbox1 } = $args{text};
$tags{wpEdittime } = $edit_time;
$tags{wpStarttime} = $start_time;
$tags{wpSave } = 'लेख सहेजें';
$tags{wpSection } = $args{section};
$tags{wpSummary } = $args{summary};
$tags{wpEditToken} = $token;
$tags{wpMinoredit} = 1;
$tags{title } = $args{title};
$tags{action } = 'submit';
# my $form = HTML::Form->parse($resp);
# my $text = $form->find_input('wpTextbox1')->value;
# my $summary = $form->find_input('wpSummary')->value;
# my $save = $form->find_input('wpSave')->value;
# my $edittoken = $form->find_input('wpEditToken')->value;
# my $starttime = $form->find_input('wpStarttime')->value;
# my $edittime = $form->find_input('wpEdittime')->value;
# my $minoredit = $form->find_input('wpMinoredit')->value;
# $form->value('wpTextbox1', $args{text});
# $form->value('wpSummary', $args{summary});
# $form->value('wpMinoredit',1);
# my $resp = $ua->request($form->click);
my $submit_url = "$args{protocol}://$WHOST/$WPATH/index.php?title=$args{title}\&action=submit";
my $request = HTTP::Request->new(POST => $submit_url);
$request->header(Content_Type => 'application/x-www-form-urlencoded');
$request->content("wpTextbox1=$args{text}&wpEdittime=$edit_time&wpSave=लेख सहेजें&wpSection=$args{section}&wpSummary=$args{summary}&wpEditToken=$token&wpMinoredit=1&title=$args{title}&action=submit");
# my $content = join('\&' , %tags);
# $request->content(%tags);
# $request->header(Cookie => $sessioncookie);
# $cookie_jar->set_cookie(0, 'hiwiki_session', $sessionid, "\/", 'hi.wikipedia.org', , 1, 0, 8640000, , \ 'httponly' ) ;
$cookie_jar->load("mediawikihindi-sessioncookies.txt");
my $resp = $ua->request($request);
if ($self->{debug} > 2) {
my @contents = $resp->content;
open (TESTING, ">testing.html");
foreach my $contentline (@contents) {
print TESTING "$contentline\n";
}
close TESTING;
}
foreach (sort keys %{$resp->{_headers}}) {
Debug "(header) $_ = " . $resp->{_headers}->{$_} if $self->{debug} > 2;
}
my $response_location = $resp->{_headers}->{location} || '';
Debug "Response Location: $response_location" if $self->{debug};
Debug "Comparing with \"/$args{title}\"" if $self->{debug};
if ($response_location =~ /[\/=]$args{title}/i) {
Debug "Success!" if $self->{debug};
return 1;
}
else {
Debug "NOK!" if $self->{debug};
return 2;
}
}
sub get {
my $self = shift;
my $Key = shift;
$Var{$Key};
}
sub let {
my $self = shift;
my $Key = shift;
my $Value = shift;
Debug "[let] $Key = $Value" if $self->{debug};
$Var{$Key} = $Value;
}
sub getPage {
# returns arrayref of lines of page source
# Function created by Matt Hucke <hucke@nospam-cynico.net>
my ($self, %args) = @_;
$args{protocol} ||= $self->{protocol};
$args{section } ||= 0;
if ($self->{debug}) {
while (my ($k, $v) = each %args) {
Debug "[getPage] $k = \"$v\"";
}
while (my ($k, $v) = each %Var) {
Debug "[getPage] VAR $k = \"$v\"";
}
}
my $WHOST = $self->{host} || 'localhost';
my $WPATH = $self->{path} || '';
Debug "Fetching page '$args{title}' (section '$args{section}')..." if $self->{debug};
my $edit_section = $args{section} ? "\§ion=$args{section}" : '';
my $myurl = "$args{protocol}://$WHOST/$WPATH/index.php?title=$args{title}&action=edit$edit_section";
my $request = HTTP::Request->new(GET => $myurl);
my $resp = $ua->request($request);
while (my ($k, $v) = each %{$resp->{_headers}}) {
Debug "(header) $k = $v" if $self->{debug} > 2;
}
my @lines = split /\n/, $resp->content;
my @content;
my $saving = 0;
# This is a very simple parser - it looks for <textarea...wpTextbox1> and </textarea>
# and returns everything in between.
for (my $jj = 0; $jj <= $#lines; $jj++) {
my $line = $lines[$jj];
if ($lines[$jj+1] =~ /^[^>]+><\/textarea>/) {
#this means there is no content on the page, skip everything
last;
}
if ($line =~ /<textarea.*wpTextbox1/) {
$saving = 1;
if ($line =~ /<textarea[^>]+>(.*)/) {
$line = $1; # strip out <textarea.....>, keep what's after.
} else {
# ADVANCE to next line
++$jj;
$line = $lines[$jj];
# strip out end of textarea tag at start of line
$line =~ s#^[^>]+>##;
}
# if any of $line remains, fall thru to 'push' part.
next unless $line;
} elsif ($line =~ m#(.*)</textarea>#) {
push (@content, $line) if $saving && $1;
$saving = 0;
}
push (@content, $line) if $saving;
}
# Always return an arrayref for later processing
return @content;
}
sub getSpecialPage {
# returns arrayref of lines of page source
# Function created by Matt Hucke <hucke@nospam-cynico.net>
my ($self, %args) = @_;
$args{protocol} ||= $self->{protocol};
if ($self->{debug}) {
while (my ($k, $v) = each %args) {
Debug "[getPage] $k = \"$v\"";
}
while (my ($k, $v) = each %Var) {
Debug "[getPage] VAR $k = \"$v\"";
}
}
my $WHOST = $self->{host} || 'localhost';
my $WPATH = $self->{path} || '';
Debug "Fetching page '$args{title}'..." if $self->{debug};
my $myurl = "$args{protocol}://$WHOST/wiki/$args{title}";
my $request = HTTP::Request->new(GET => $myurl);
my $resp = $ua->request($request);
my @lines = split /\n/, $resp->content;
while (my ($k, $v) = each %{$resp->{_headers}}) {
Debug "(header) $k = $v" if $self->{debug} > 2;
}
return @lines;
}
sub Error ($) {
print "Content-type: text/html\n\n" unless $contentType;
print "<b>ERROR</b> ($package): $_[0]\n";
exit(1);
}
sub Debug ($) { print "[ $package ] $_[0]\n"; }
#### Used Warning / Error Codes ##########################
# Next free W Code: 1000
# Next free E Code: 1000
1;
__END__
=head1 NAME
CMS::MediaWiki - Perl extension for creating, reading and updating MediaWiki pages
=head1 SYNOPSIS
use CMS::MediaWiki;
my $mw = CMS::MediaWiki->new(
# protocol => 'https', # Optional, default is http
host => 'localhost', # Default: localhost
path => 'wiki' , # Can be empty on 3rd-level domain Wikis
debug => 0 # Optional. 0=no debug msgs, 1=some msgs, 2=more msgs
);
=head1 DESCRIPTION
Create or update MediaWiki pages. An update of a MediaWiki page can also be
reduced to a specific page section. You may update many pages with the same
object handle ($mw in the shown example).
You could change the login name between an update. This might be necessary
if you would like to update a public page *and* a protected page by the
WikiSysop user in just one cycle.
=head2 Login example
if ($mw->login(user => 'Reto', pass => 'yourpass')) {
print STDERR "Could not login\n";
exit;
}
else {
# Logged in. Do stuff ...
}
=head2 Another login example
$rc = $mw->login(
protocol => 'https', # optional, default is http
host => 'localhost' , # optional here, but wins if (re-)set here
path => 'wiki', # optional here, but wins
user => 'Reto' , # default: Perlbot
pass => 'yourpass' ,
);
=head2 Edit a Wiki page or section
$rc = $mw->editPage(
title => 'Online_Directory:Computers:Software:Internet:Authoring' ,
section => '' , # 2 means edit second section etc.
# '' = no section means edit the full page
text => "== Your Section Title ==\nbar foo\n\n",
summary => "Your summary." , # optional
);
=head2 Get a Wiki page or a section of a Wiki page
$lines_ref = $mw->getPage(title => 'Perl_driven', section => 1); # omit section to get full page
# Process Wiki lines ...
print sprintf('%08d ', ++$i), " $_\n" foreach @$lines_ref;
In general, $rc returns 0 on success unequal 0 on failure.
=head3 Tip
After a successful call of the B<editPage> function you had the
following information available:
print "Edit time (before) was ", $mw->get('EDIT_TIME_BEFORE'), "\n";
print "Page title was " , $mw->get('PAGE_TITLE') , "\n";
print "The Wiki name was " , $mw->get('WIKI_NAME') , "\n";
=head2 EXPORT
None by default.
=head1 SEE ALSO
=item *
http://meta.pgate.net/cms-mediawiki/
=item *
http://www.infocopter.com/perl/modules/
=item *
http://www.infocopter.com/know-how/mediawiki-reference/
=head1 AUTHOR
Reto Schaer, E<lt>retoh@nospam-cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2005-2007 by Reto Schaer
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.
http://www.infocopter.com/perl/licencing.html
=cut