1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ 13 14use strict; 15use MediaWiki::API; 16use Git; 17use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe 18 EMPTY HTTP_CODE_OK); 19use DateTime::Format::ISO8601; 20use warnings; 21 22# By default, use UTF-8 to communicate with Git and the user 23binmode STDERR,':encoding(UTF-8)'; 24binmode STDOUT,':encoding(UTF-8)'; 25 26use URI::Escape; 27 28# It's not always possible to delete pages (may require some 29# privileges). Deleted pages are replaced with this content. 30useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 31 32# It's not possible to create empty pages. New empty files in Git are 33# sent with this content instead. 34useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 35 36# used to reflect file creation or deletion in diff. 37useconstant NULL_SHA1 =>'0000000000000000000000000000000000000000'; 38 39# Used on Git's side to reflect empty edit messages on the wiki 40useconstant EMPTY_MESSAGE =>'*Empty MediaWiki Message*'; 41 42# Number of pages taken into account at once in submodule get_mw_page_list 43useconstant SLICE_SIZE =>50; 44 45# Number of linked mediafile to get at once in get_linked_mediafiles 46# The query is split in small batches because of the MW API limit of 47# the number of links to be returned (500 links max). 48useconstant BATCH_SIZE =>10; 49 50if(@ARGV!=2) { 51 exit_error_usage(); 52} 53 54my$remotename=$ARGV[0]; 55my$url=$ARGV[1]; 56 57# Accept both space-separated and multiple keys in config file. 58# Spaces should be written as _ anyway because we'll use chomp. 59my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.${remotename}.pages")); 60chomp(@tracked_pages); 61 62# Just like @tracked_pages, but for MediaWiki categories. 63my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.${remotename}.categories")); 64chomp(@tracked_categories); 65 66# Just like @tracked_categories, but for MediaWiki namespaces. 67my@tracked_namespaces=split(/[ \n]/, run_git("config --get-all remote.${remotename}.namespaces")); 68chomp(@tracked_namespaces); 69 70# Import media files on pull 71my$import_media= run_git("config --get --bool remote.${remotename}.mediaimport"); 72chomp($import_media); 73$import_media= ($import_mediaeq'true'); 74 75# Export media files on push 76my$export_media= run_git("config --get --bool remote.${remotename}.mediaexport"); 77chomp($export_media); 78$export_media= !($export_mediaeq'false'); 79 80my$wiki_login= run_git("config --get remote.${remotename}.mwLogin"); 81# Note: mwPassword is discourraged. Use the credential system instead. 82my$wiki_passwd= run_git("config --get remote.${remotename}.mwPassword"); 83my$wiki_domain= run_git("config --get remote.${remotename}.mwDomain"); 84chomp($wiki_login); 85chomp($wiki_passwd); 86chomp($wiki_domain); 87 88# Import only last revisions (both for clone and fetch) 89my$shallow_import= run_git("config --get --bool remote.${remotename}.shallow"); 90chomp($shallow_import); 91$shallow_import= ($shallow_importeq'true'); 92 93# Fetch (clone and pull) by revisions instead of by pages. This behavior 94# is more efficient when we have a wiki with lots of pages and we fetch 95# the revisions quite often so that they concern only few pages. 96# Possible values: 97# - by_rev: perform one query per new revision on the remote wiki 98# - by_page: query each tracked page for new revision 99my$fetch_strategy= run_git("config --get remote.${remotename}.fetchStrategy"); 100if(!$fetch_strategy) { 101$fetch_strategy= run_git('config --get mediawiki.fetchStrategy'); 102} 103chomp($fetch_strategy); 104if(!$fetch_strategy) { 105$fetch_strategy='by_page'; 106} 107 108# Remember the timestamp corresponding to a revision id. 109my%basetimestamps; 110 111# Dumb push: don't update notes and mediawiki ref to reflect the last push. 112# 113# Configurable with mediawiki.dumbPush, or per-remote with 114# remote.<remotename>.dumbPush. 115# 116# This means the user will have to re-import the just-pushed 117# revisions. On the other hand, this means that the Git revisions 118# corresponding to MediaWiki revisions are all imported from the wiki, 119# regardless of whether they were initially created in Git or from the 120# web interface, hence all users will get the same history (i.e. if 121# the push from Git to MediaWiki loses some information, everybody 122# will get the history with information lost). If the import is 123# deterministic, this means everybody gets the same sha1 for each 124# MediaWiki revision. 125my$dumb_push= run_git("config --get --bool remote.${remotename}.dumbPush"); 126if(!$dumb_push) { 127$dumb_push= run_git('config --get --bool mediawiki.dumbPush'); 128} 129chomp($dumb_push); 130$dumb_push= ($dumb_pusheq'true'); 131 132my$wiki_name=$url; 133$wiki_name=~s{[^/]*://}{}; 134# If URL is like http://user:password@example.com/, we clearly don't 135# want the password in $wiki_name. While we're there, also remove user 136# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 137$wiki_name=~s/^.*@//; 138 139# Commands parser 140while(<STDIN>) { 141chomp; 142 143if(!parse_command($_)) { 144last; 145} 146 147BEGIN{ $| =1}# flush STDOUT, to make sure the previous 148# command is fully processed. 149} 150 151########################## Functions ############################## 152 153## error handling 154sub exit_error_usage { 155die"ERROR: git-remote-mediawiki module was not called with a correct number of\n". 156"parameters\n". 157"You may obtain this error because you attempted to run the git-remote-mediawiki\n". 158"module directly.\n". 159"This module can be used the following way:\n". 160"\tgit clone mediawiki://<address of a mediawiki>\n". 161"Then, use git commit, push and pull as with every normal git repository.\n"; 162} 163 164sub parse_command { 165my($line) =@_; 166my@cmd=split(/ /,$line); 167if(!defined$cmd[0]) { 168return0; 169} 170if($cmd[0]eq'capabilities') { 171die("Too many arguments for capabilities\n") 172if(defined($cmd[1])); 173 mw_capabilities(); 174}elsif($cmd[0]eq'list') { 175die("Too many arguments for list\n")if(defined($cmd[2])); 176 mw_list($cmd[1]); 177}elsif($cmd[0]eq'import') { 178die("Invalid argument for import\n") 179if($cmd[1]eq EMPTY); 180die("Too many arguments for import\n") 181if(defined($cmd[2])); 182 mw_import($cmd[1]); 183}elsif($cmd[0]eq'option') { 184die("Invalid arguments for option\n") 185if($cmd[1]eq EMPTY ||$cmd[2]eq EMPTY); 186die("Too many arguments for option\n") 187if(defined($cmd[3])); 188 mw_option($cmd[1],$cmd[2]); 189}elsif($cmd[0]eq'push') { 190 mw_push($cmd[1]); 191}else{ 192print{*STDERR}"Unknown command. Aborting...\n"; 193return0; 194} 195return1; 196} 197 198# MediaWiki API instance, created lazily. 199my$mediawiki; 200 201sub fatal_mw_error { 202my$action=shift; 203print STDERR "fatal: could not$action.\n"; 204print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 205if($url=~/^https/) { 206print STDERR "fatal: make sure '$url/api.php' is a valid page\n"; 207print STDERR "fatal: and the SSL certificate is correct.\n"; 208}else{ 209print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 210} 211print STDERR "fatal: (error ". 212$mediawiki->{error}->{code} .': '. 213$mediawiki->{error}->{details} .")\n"; 214exit1; 215} 216 217## Functions for listing pages on the remote wiki 218sub get_mw_tracked_pages { 219my$pages=shift; 220 get_mw_page_list(\@tracked_pages,$pages); 221return; 222} 223 224sub get_mw_page_list { 225my$page_list=shift; 226my$pages=shift; 227my@some_pages= @{$page_list}; 228while(@some_pages) { 229my$last_page= SLICE_SIZE; 230if($#some_pages<$last_page) { 231$last_page=$#some_pages; 232} 233my@slice=@some_pages[0..$last_page]; 234 get_mw_first_pages(\@slice,$pages); 235@some_pages=@some_pages[(SLICE_SIZE +1)..$#some_pages]; 236} 237return; 238} 239 240sub get_mw_tracked_categories { 241my$pages=shift; 242foreachmy$category(@tracked_categories) { 243if(index($category,':') <0) { 244# Mediawiki requires the Category 245# prefix, but let's not force the user 246# to specify it. 247$category="Category:${category}"; 248} 249my$mw_pages=$mediawiki->list( { 250 action =>'query', 251 list =>'categorymembers', 252 cmtitle =>$category, 253 cmlimit =>'max'} ) 254||die$mediawiki->{error}->{code} .': ' 255.$mediawiki->{error}->{details} ."\n"; 256foreachmy$page(@{$mw_pages}) { 257$pages->{$page->{title}} =$page; 258} 259} 260return; 261} 262 263sub get_mw_tracked_namespaces { 264my$pages=shift; 265foreachmy$local_namespace(@tracked_namespaces) { 266my$mw_pages=$mediawiki->list( { 267 action =>'query', 268 list =>'allpages', 269 apnamespace => get_mw_namespace_id($local_namespace), 270 aplimit =>'max'} ) 271||die$mediawiki->{error}->{code} .': ' 272.$mediawiki->{error}->{details} ."\n"; 273foreachmy$page(@{$mw_pages}) { 274$pages->{$page->{title}} =$page; 275} 276} 277return; 278} 279 280sub get_mw_all_pages { 281my$pages=shift; 282# No user-provided list, get the list of pages from the API. 283my$mw_pages=$mediawiki->list({ 284 action =>'query', 285 list =>'allpages', 286 aplimit =>'max' 287}); 288if(!defined($mw_pages)) { 289 fatal_mw_error("get the list of wiki pages"); 290} 291foreachmy$page(@{$mw_pages}) { 292$pages->{$page->{title}} =$page; 293} 294return; 295} 296 297# queries the wiki for a set of pages. Meant to be used within a loop 298# querying the wiki for slices of page list. 299sub get_mw_first_pages { 300my$some_pages=shift; 301my@some_pages= @{$some_pages}; 302 303my$pages=shift; 304 305# pattern 'page1|page2|...' required by the API 306my$titles=join('|',@some_pages); 307 308my$mw_pages=$mediawiki->api({ 309 action =>'query', 310 titles =>$titles, 311}); 312if(!defined($mw_pages)) { 313 fatal_mw_error("query the list of wiki pages"); 314} 315while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 316if($id<0) { 317print{*STDERR}"Warning: page$page->{title} not found on wiki\n"; 318}else{ 319$pages->{$page->{title}} =$page; 320} 321} 322return; 323} 324 325# Get the list of pages to be fetched according to configuration. 326sub get_mw_pages { 327$mediawiki= connect_maybe($mediawiki,$remotename,$url); 328 329print{*STDERR}"Listing pages on remote wiki...\n"; 330 331my%pages;# hash on page titles to avoid duplicates 332my$user_defined; 333if(@tracked_pages) { 334$user_defined=1; 335# The user provided a list of pages titles, but we 336# still need to query the API to get the page IDs. 337 get_mw_tracked_pages(\%pages); 338} 339if(@tracked_categories) { 340$user_defined=1; 341 get_mw_tracked_categories(\%pages); 342} 343if(@tracked_namespaces) { 344$user_defined=1; 345 get_mw_tracked_namespaces(\%pages); 346} 347if(!$user_defined) { 348 get_mw_all_pages(\%pages); 349} 350if($import_media) { 351print{*STDERR}"Getting media files for selected pages...\n"; 352if($user_defined) { 353 get_linked_mediafiles(\%pages); 354}else{ 355 get_all_mediafiles(\%pages); 356} 357} 358print{*STDERR} (scalar keys%pages) ." pages found.\n"; 359return%pages; 360} 361 362# usage: $out = run_git("command args"); 363# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 364sub run_git { 365my$args=shift; 366my$encoding= (shift||'encoding(UTF-8)'); 367open(my$git,"-|:${encoding}","git ${args}") 368or die"Unable to fork:$!\n"; 369my$res=do{ 370local$/=undef; 371<$git> 372}; 373close($git); 374 375return$res; 376} 377 378 379sub get_all_mediafiles { 380my$pages=shift; 381# Attach list of all pages for media files from the API, 382# they are in a different namespace, only one namespace 383# can be queried at the same moment 384my$mw_pages=$mediawiki->list({ 385 action =>'query', 386 list =>'allpages', 387 apnamespace => get_mw_namespace_id('File'), 388 aplimit =>'max' 389}); 390if(!defined($mw_pages)) { 391print{*STDERR}"fatal: could not get the list of pages for media files.\n"; 392print{*STDERR}"fatal: '$url' does not appear to be a mediawiki\n"; 393print{*STDERR}"fatal: make sure '$url/api.php' is a valid page.\n"; 394exit1; 395} 396foreachmy$page(@{$mw_pages}) { 397$pages->{$page->{title}} =$page; 398} 399return; 400} 401 402sub get_linked_mediafiles { 403my$pages=shift; 404my@titles=map{$_->{title} }values(%{$pages}); 405 406my$batch= BATCH_SIZE; 407while(@titles) { 408if($#titles<$batch) { 409$batch=$#titles; 410} 411my@slice=@titles[0..$batch]; 412 413# pattern 'page1|page2|...' required by the API 414my$mw_titles=join('|',@slice); 415 416# Media files could be included or linked from 417# a page, get all related 418my$query= { 419 action =>'query', 420 prop =>'links|images', 421 titles =>$mw_titles, 422 plnamespace => get_mw_namespace_id('File'), 423 pllimit =>'max' 424}; 425my$result=$mediawiki->api($query); 426 427while(my($id,$page) =each(%{$result->{query}->{pages}})) { 428my@media_titles; 429if(defined($page->{links})) { 430my@link_titles 431=map{$_->{title} } @{$page->{links}}; 432push(@media_titles,@link_titles); 433} 434if(defined($page->{images})) { 435my@image_titles 436=map{$_->{title} } @{$page->{images}}; 437push(@media_titles,@image_titles); 438} 439if(@media_titles) { 440 get_mw_page_list(\@media_titles,$pages); 441} 442} 443 444@titles=@titles[($batch+1)..$#titles]; 445} 446return; 447} 448 449sub get_mw_mediafile_for_page_revision { 450# Name of the file on Wiki, with the prefix. 451my$filename=shift; 452my$timestamp=shift; 453my%mediafile; 454 455# Search if on a media file with given timestamp exists on 456# MediaWiki. In that case download the file. 457my$query= { 458 action =>'query', 459 prop =>'imageinfo', 460 titles =>"File:${filename}", 461 iistart =>$timestamp, 462 iiend =>$timestamp, 463 iiprop =>'timestamp|archivename|url', 464 iilimit =>1 465}; 466my$result=$mediawiki->api($query); 467 468my($fileid,$file) =each( %{$result->{query}->{pages}} ); 469# If not defined it means there is no revision of the file for 470# given timestamp. 471if(defined($file->{imageinfo})) { 472$mediafile{title} =$filename; 473 474my$fileinfo=pop(@{$file->{imageinfo}}); 475$mediafile{timestamp} =$fileinfo->{timestamp}; 476# Mediawiki::API's download function doesn't support https URLs 477# and can't download old versions of files. 478print{*STDERR}"\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 479$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 480} 481return%mediafile; 482} 483 484sub download_mw_mediafile { 485my$download_url=shift; 486 487my$response=$mediawiki->{ua}->get($download_url); 488if($response->code== HTTP_CODE_OK) { 489# It is tempting to return 490# $response->decoded_content({charset => "none"}), but 491# when doing so, utf8::downgrade($content) fails with 492# "Wide character in subroutine entry". 493$response->decode(); 494return$response->content(); 495}else{ 496print{*STDERR}"Error downloading mediafile from :\n"; 497print{*STDERR}"URL: ${download_url}\n"; 498print{*STDERR}'Server response: '.$response->code.q{ }.$response->message."\n"; 499exit1; 500} 501} 502 503sub get_last_local_revision { 504# Get note regarding last mediawiki revision 505my$note= run_git("notes --ref=${remotename}/mediawiki show refs/mediawiki/${remotename}/master 2>/dev/null"); 506my@note_info=split(/ /,$note); 507 508my$lastrevision_number; 509if(!(defined($note_info[0]) &&$note_info[0]eq'mediawiki_revision:')) { 510print{*STDERR}'No previous mediawiki revision found'; 511$lastrevision_number=0; 512}else{ 513# Notes are formatted : mediawiki_revision: #number 514$lastrevision_number=$note_info[1]; 515chomp($lastrevision_number); 516print{*STDERR}"Last local mediawiki revision found is ${lastrevision_number}"; 517} 518return$lastrevision_number; 519} 520 521# Get the last remote revision without taking in account which pages are 522# tracked or not. This function makes a single request to the wiki thus 523# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev 524# option. 525sub get_last_global_remote_rev { 526$mediawiki= connect_maybe($mediawiki,$remotename,$url); 527 528my$query= { 529 action =>'query', 530 list =>'recentchanges', 531 prop =>'revisions', 532 rclimit =>'1', 533 rcdir =>'older', 534}; 535my$result=$mediawiki->api($query); 536return$result->{query}->{recentchanges}[0]->{revid}; 537} 538 539# Get the last remote revision concerning the tracked pages and the tracked 540# categories. 541sub get_last_remote_revision { 542$mediawiki= connect_maybe($mediawiki,$remotename,$url); 543 544my%pages_hash= get_mw_pages(); 545my@pages=values(%pages_hash); 546 547my$max_rev_num=0; 548 549print{*STDERR}"Getting last revision id on tracked pages...\n"; 550 551foreachmy$page(@pages) { 552my$id=$page->{pageid}; 553 554my$query= { 555 action =>'query', 556 prop =>'revisions', 557 rvprop =>'ids|timestamp', 558 pageids =>$id, 559}; 560 561my$result=$mediawiki->api($query); 562 563my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 564 565$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 566 567$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 568} 569 570print{*STDERR}"Last remote revision found is$max_rev_num.\n"; 571return$max_rev_num; 572} 573 574# Clean content before sending it to MediaWiki 575sub mediawiki_clean { 576my$string=shift; 577my$page_created=shift; 578# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 579# This function right trims a string and adds a \n at the end to follow this rule 580$string=~s/\s+$//; 581if($stringeq EMPTY &&$page_created) { 582# Creating empty pages is forbidden. 583$string= EMPTY_CONTENT; 584} 585return$string."\n"; 586} 587 588# Filter applied on MediaWiki data before adding them to Git 589sub mediawiki_smudge { 590my$string=shift; 591if($stringeq EMPTY_CONTENT) { 592$string= EMPTY; 593} 594# This \n is important. This is due to mediawiki's way to handle end of files. 595return"${string}\n"; 596} 597 598sub literal_data { 599my($content) =@_; 600print{*STDOUT}'data ', bytes::length($content),"\n",$content; 601return; 602} 603 604sub literal_data_raw { 605# Output possibly binary content. 606my($content) =@_; 607# Avoid confusion between size in bytes and in characters 608 utf8::downgrade($content); 609binmode STDOUT,':raw'; 610print{*STDOUT}'data ', bytes::length($content),"\n",$content; 611binmode STDOUT,':encoding(UTF-8)'; 612return; 613} 614 615sub mw_capabilities { 616# Revisions are imported to the private namespace 617# refs/mediawiki/$remotename/ by the helper and fetched into 618# refs/remotes/$remotename later by fetch. 619print{*STDOUT}"refspec refs/heads/*:refs/mediawiki/${remotename}/*\n"; 620print{*STDOUT}"import\n"; 621print{*STDOUT}"list\n"; 622print{*STDOUT}"push\n"; 623if($dumb_push) { 624print{*STDOUT}"no-private-update\n"; 625} 626print{*STDOUT}"\n"; 627return; 628} 629 630sub mw_list { 631# MediaWiki do not have branches, we consider one branch arbitrarily 632# called master, and HEAD pointing to it. 633print{*STDOUT}"? refs/heads/master\n"; 634print{*STDOUT}"\@refs/heads/masterHEAD\n"; 635print{*STDOUT}"\n"; 636return; 637} 638 639sub mw_option { 640print{*STDERR}"remote-helper command 'option$_[0]' not yet implemented\n"; 641print{*STDOUT}"unsupported\n"; 642return; 643} 644 645sub fetch_mw_revisions_for_page { 646my$page=shift; 647my$id=shift; 648my$fetch_from=shift; 649my@page_revs= (); 650my$query= { 651 action =>'query', 652 prop =>'revisions', 653 rvprop =>'ids', 654 rvdir =>'newer', 655 rvstartid =>$fetch_from, 656 rvlimit =>500, 657 pageids =>$id, 658 659# Let MediaWiki know that we support the latest API. 660continue=>'', 661}; 662 663my$revnum=0; 664# Get 500 revisions at a time due to the mediawiki api limit 665while(1) { 666my$result=$mediawiki->api($query); 667 668# Parse each of those 500 revisions 669foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 670my$page_rev_ids; 671$page_rev_ids->{pageid} =$page->{pageid}; 672$page_rev_ids->{revid} =$revision->{revid}; 673push(@page_revs,$page_rev_ids); 674$revnum++; 675} 676 677if($result->{'query-continue'}) {# For legacy APIs 678$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 679}elsif($result->{continue}) {# For newer APIs 680$query->{rvstartid} =$result->{continue}->{rvcontinue}; 681$query->{continue} =$result->{continue}->{continue}; 682}else{ 683last; 684} 685} 686if($shallow_import&&@page_revs) { 687print{*STDERR}" Found 1 revision (shallow import).\n"; 688@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 689return$page_revs[0]; 690} 691print{*STDERR}" Found ${revnum} revision(s).\n"; 692return@page_revs; 693} 694 695sub fetch_mw_revisions { 696my$pages=shift;my@pages= @{$pages}; 697my$fetch_from=shift; 698 699my@revisions= (); 700my$n=1; 701foreachmy$page(@pages) { 702my$id=$page->{pageid}; 703print{*STDERR}"page ${n}/",scalar(@pages),': ',$page->{title},"\n"; 704$n++; 705my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 706@revisions= (@page_revs,@revisions); 707} 708 709return($n,@revisions); 710} 711 712sub fe_escape_path { 713my$path=shift; 714$path=~s/\\/\\\\/g; 715$path=~s/"/\\"/g; 716$path=~s/\n/\\n/g; 717returnqq("${path}"); 718} 719 720sub import_file_revision { 721my$commit=shift; 722my%commit= %{$commit}; 723my$full_import=shift; 724my$n=shift; 725my$mediafile=shift; 726my%mediafile; 727if($mediafile) { 728%mediafile= %{$mediafile}; 729} 730 731my$title=$commit{title}; 732my$comment=$commit{comment}; 733my$content=$commit{content}; 734my$author=$commit{author}; 735my$date=$commit{date}; 736 737print{*STDOUT}"commit refs/mediawiki/${remotename}/master\n"; 738print{*STDOUT}"mark :${n}\n"; 739print{*STDOUT}"committer ${author} <${author}\@${wiki_name}> ".$date->epoch." +0000\n"; 740 literal_data($comment); 741 742# If it's not a clone, we need to know where to start from 743if(!$full_import&&$n==1) { 744print{*STDOUT}"from refs/mediawiki/${remotename}/master^0\n"; 745} 746if($contentne DELETED_CONTENT) { 747print{*STDOUT}'M 644 inline '. 748 fe_escape_path("${title}.mw") ."\n"; 749 literal_data($content); 750if(%mediafile) { 751print{*STDOUT}'M 644 inline ' 752. fe_escape_path($mediafile{title}) ."\n"; 753 literal_data_raw($mediafile{content}); 754} 755print{*STDOUT}"\n\n"; 756}else{ 757print{*STDOUT}'D '. fe_escape_path("${title}.mw") ."\n"; 758} 759 760# mediawiki revision number in the git note 761if($full_import&&$n==1) { 762print{*STDOUT}"reset refs/notes/${remotename}/mediawiki\n"; 763} 764print{*STDOUT}"commit refs/notes/${remotename}/mediawiki\n"; 765print{*STDOUT}"committer ${author} <${author}\@${wiki_name}> ".$date->epoch." +0000\n"; 766 literal_data('Note added by git-mediawiki during import'); 767if(!$full_import&&$n==1) { 768print{*STDOUT}"from refs/notes/${remotename}/mediawiki^0\n"; 769} 770print{*STDOUT}"N inline :${n}\n"; 771 literal_data("mediawiki_revision:$commit{mw_revision}"); 772print{*STDOUT}"\n\n"; 773return; 774} 775 776# parse a sequence of 777# <cmd> <arg1> 778# <cmd> <arg2> 779# \n 780# (like batch sequence of import and sequence of push statements) 781sub get_more_refs { 782my$cmd=shift; 783my@refs; 784while(1) { 785my$line= <STDIN>; 786if($line=~/^$cmd (.*)$/) { 787push(@refs,$1); 788}elsif($lineeq"\n") { 789return@refs; 790}else{ 791die("Invalid command in a '$cmd' batch:$_\n"); 792} 793} 794return; 795} 796 797sub mw_import { 798# multiple import commands can follow each other. 799my@refs= (shift, get_more_refs('import')); 800foreachmy$ref(@refs) { 801 mw_import_ref($ref); 802} 803print{*STDOUT}"done\n"; 804return; 805} 806 807sub mw_import_ref { 808my$ref=shift; 809# The remote helper will call "import HEAD" and 810# "import refs/heads/master". 811# Since HEAD is a symbolic ref to master (by convention, 812# followed by the output of the command "list" that we gave), 813# we don't need to do anything in this case. 814if($refeq'HEAD') { 815return; 816} 817 818$mediawiki= connect_maybe($mediawiki,$remotename,$url); 819 820print{*STDERR}"Searching revisions...\n"; 821my$last_local= get_last_local_revision(); 822my$fetch_from=$last_local+1; 823if($fetch_from==1) { 824print{*STDERR}", fetching from beginning.\n"; 825}else{ 826print{*STDERR}", fetching from here.\n"; 827} 828 829my$n=0; 830if($fetch_strategyeq'by_rev') { 831print{*STDERR}"Fetching & writing export data by revs...\n"; 832$n= mw_import_ref_by_revs($fetch_from); 833}elsif($fetch_strategyeq'by_page') { 834print{*STDERR}"Fetching & writing export data by pages...\n"; 835$n= mw_import_ref_by_pages($fetch_from); 836}else{ 837print{*STDERR}qq(fatal: invalid fetch strategy "${fetch_strategy}".\n); 838print{*STDERR}"Check your configuration variables remote.${remotename}.fetchStrategy and mediawiki.fetchStrategy\n"; 839exit1; 840} 841 842if($fetch_from==1&&$n==0) { 843print{*STDERR}"You appear to have cloned an empty MediaWiki.\n"; 844# Something has to be done remote-helper side. If nothing is done, an error is 845# thrown saying that HEAD is referring to unknown object 0000000000000000000 846# and the clone fails. 847} 848return; 849} 850 851sub mw_import_ref_by_pages { 852 853my$fetch_from=shift; 854my%pages_hash= get_mw_pages(); 855my@pages=values(%pages_hash); 856 857my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 858 859@revisions=sort{$a->{revid} <=>$b->{revid}}@revisions; 860my@revision_ids=map{$_->{revid} }@revisions; 861 862return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 863} 864 865sub mw_import_ref_by_revs { 866 867my$fetch_from=shift; 868my%pages_hash= get_mw_pages(); 869 870my$last_remote= get_last_global_remote_rev(); 871my@revision_ids=$fetch_from..$last_remote; 872return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 873} 874 875# Import revisions given in second argument (array of integers). 876# Only pages appearing in the third argument (hash indexed by page titles) 877# will be imported. 878sub mw_import_revids { 879my$fetch_from=shift; 880my$revision_ids=shift; 881my$pages=shift; 882 883my$n=0; 884my$n_actual=0; 885my$last_timestamp=0;# Placeholder in case $rev->timestamp is undefined 886 887foreachmy$pagerevid(@{$revision_ids}) { 888# Count page even if we skip it, since we display 889# $n/$total and $total includes skipped pages. 890$n++; 891 892# fetch the content of the pages 893my$query= { 894 action =>'query', 895 prop =>'revisions', 896 rvprop =>'content|timestamp|comment|user|ids', 897 revids =>$pagerevid, 898}; 899 900my$result=$mediawiki->api($query); 901 902if(!$result) { 903die"Failed to retrieve modified page for revision$pagerevid\n"; 904} 905 906if(defined($result->{query}->{badrevids}->{$pagerevid})) { 907# The revision id does not exist on the remote wiki. 908next; 909} 910 911if(!defined($result->{query}->{pages})) { 912die"Invalid revision ${pagerevid}.\n"; 913} 914 915my@result_pages=values(%{$result->{query}->{pages}}); 916my$result_page=$result_pages[0]; 917my$rev=$result_pages[0]->{revisions}->[0]; 918 919my$page_title=$result_page->{title}; 920 921if(!exists($pages->{$page_title})) { 922print{*STDERR}"${n}/",scalar(@{$revision_ids}), 923": Skipping revision #$rev->{revid} of ${page_title}\n"; 924next; 925} 926 927$n_actual++; 928 929my%commit; 930$commit{author} =$rev->{user} ||'Anonymous'; 931$commit{comment} =$rev->{comment} || EMPTY_MESSAGE; 932$commit{title} = smudge_filename($page_title); 933$commit{mw_revision} =$rev->{revid}; 934$commit{content} = mediawiki_smudge($rev->{'*'}); 935 936if(!defined($rev->{timestamp})) { 937$last_timestamp++; 938}else{ 939$last_timestamp=$rev->{timestamp}; 940} 941$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 942 943# Differentiates classic pages and media files. 944my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 945my%mediafile; 946if($namespace) { 947my$id= get_mw_namespace_id($namespace); 948if($id&&$id== get_mw_namespace_id('File')) { 949%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 950} 951} 952# If this is a revision of the media page for new version 953# of a file do one common commit for both file and media page. 954# Else do commit only for that page. 955print{*STDERR}"${n}/",scalar(@{$revision_ids}),": Revision #$rev->{revid} of$commit{title}\n"; 956 import_file_revision(\%commit, ($fetch_from==1),$n_actual, \%mediafile); 957} 958 959return$n_actual; 960} 961 962sub error_non_fast_forward { 963my$advice= run_git('config --bool advice.pushNonFastForward'); 964chomp($advice); 965if($advicene'false') { 966# Native git-push would show this after the summary. 967# We can't ask it to display it cleanly, so print it 968# ourselves before. 969print{*STDERR}"To prevent you from losing history, non-fast-forward updates were rejected\n"; 970print{*STDERR}"Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 971print{*STDERR}"'Note about fast-forwards' section of 'git push --help' for details.\n"; 972} 973print{*STDOUT}qq(error$_[0] "non-fast-forward"\n); 974return0; 975} 976 977sub mw_upload_file { 978my$complete_file_name=shift; 979my$new_sha1=shift; 980my$extension=shift; 981my$file_deleted=shift; 982my$summary=shift; 983my$newrevid; 984my$path="File:${complete_file_name}"; 985my%hashFiles= get_allowed_file_extensions(); 986if(!exists($hashFiles{$extension})) { 987print{*STDERR}"${complete_file_name} is not a permitted file on this wiki.\n"; 988print{*STDERR}"Check the configuration of file uploads in your mediawiki.\n"; 989return$newrevid; 990} 991# Deleting and uploading a file requires a privileged user 992if($file_deleted) { 993$mediawiki= connect_maybe($mediawiki,$remotename,$url); 994my$query= { 995 action =>'delete', 996 title =>$path, 997 reason =>$summary 998}; 999if(!$mediawiki->edit($query)) {1000print{*STDERR}"Failed to delete file on remote wiki\n";1001print{*STDERR}"Check your permissions on the remote site. Error code:\n";1002print{*STDERR}$mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details};1003exit1;1004}1005}else{1006# Don't let perl try to interpret file content as UTF-8 => use "raw"1007my$content= run_git("cat-file blob ${new_sha1}",'raw');1008if($contentne EMPTY) {1009$mediawiki= connect_maybe($mediawiki,$remotename,$url);1010$mediawiki->{config}->{upload_url} =1011"${url}/index.php/Special:Upload";1012$mediawiki->edit({1013 action =>'upload',1014 filename =>$complete_file_name,1015 comment =>$summary,1016 file => [undef,1017$complete_file_name,1018 Content =>$content],1019 ignorewarnings =>1,1020}, {1021 skip_encoding =>11022} ) ||die$mediawiki->{error}->{code} .':'1023.$mediawiki->{error}->{details} ."\n";1024my$last_file_page=$mediawiki->get_page({title =>$path});1025$newrevid=$last_file_page->{revid};1026print{*STDERR}"Pushed file: ${new_sha1} - ${complete_file_name}.\n";1027}else{1028print{*STDERR}"Empty file ${complete_file_name} not pushed.\n";1029}1030}1031return$newrevid;1032}10331034sub mw_push_file {1035my$diff_info=shift;1036# $diff_info contains a string in this format:1037# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>1038my@diff_info_split=split(/[ \t]/,$diff_info);10391040# Filename, including .mw extension1041my$complete_file_name=shift;1042# Commit message1043my$summary=shift;1044# MediaWiki revision number. Keep the previous one by default,1045# in case there's no edit to perform.1046my$oldrevid=shift;1047my$newrevid;10481049if($summaryeq EMPTY_MESSAGE) {1050$summary= EMPTY;1051}10521053my$new_sha1=$diff_info_split[3];1054my$old_sha1=$diff_info_split[2];1055my$page_created= ($old_sha1eq NULL_SHA1);1056my$page_deleted= ($new_sha1eq NULL_SHA1);1057$complete_file_name= clean_filename($complete_file_name);10581059my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1060if(!defined($extension)) {1061$extension= EMPTY;1062}1063if($extensioneq'mw') {1064my$ns= get_mw_namespace_id_for_page($complete_file_name);1065if($ns&&$ns== get_mw_namespace_id('File') && (!$export_media)) {1066print{*STDERR}"Ignoring media file related page: ${complete_file_name}\n";1067return($oldrevid,'ok');1068}1069my$file_content;1070if($page_deleted) {1071# Deleting a page usually requires1072# special privileges. A common1073# convention is to replace the page1074# with this content instead:1075$file_content= DELETED_CONTENT;1076}else{1077$file_content= run_git("cat-file blob ${new_sha1}");1078}10791080$mediawiki= connect_maybe($mediawiki,$remotename,$url);10811082my$result=$mediawiki->edit( {1083 action =>'edit',1084 summary =>$summary,1085 title =>$title,1086 basetimestamp =>$basetimestamps{$oldrevid},1087 text => mediawiki_clean($file_content,$page_created),1088}, {1089 skip_encoding =>1# Helps with names with accentuated characters1090});1091if(!$result) {1092if($mediawiki->{error}->{code} ==3) {1093# edit conflicts, considered as non-fast-forward1094print{*STDERR}'Warning: Error '.1095$mediawiki->{error}->{code} .1096' from mediawiki: '.$mediawiki->{error}->{details} .1097".\n";1098return($oldrevid,'non-fast-forward');1099}else{1100# Other errors. Shouldn't happen => just die()1101die'Fatal: Error '.1102$mediawiki->{error}->{code} .1103' from mediawiki: '.$mediawiki->{error}->{details} ."\n";1104}1105}1106$newrevid=$result->{edit}->{newrevid};1107print{*STDERR}"Pushed file: ${new_sha1} - ${title}\n";1108}elsif($export_media) {1109$newrevid= mw_upload_file($complete_file_name,$new_sha1,1110$extension,$page_deleted,1111$summary);1112}else{1113print{*STDERR}"Ignoring media file ${title}\n";1114}1115$newrevid= ($newrevidor$oldrevid);1116return($newrevid,'ok');1117}11181119sub mw_push {1120# multiple push statements can follow each other1121my@refsspecs= (shift, get_more_refs('push'));1122my$pushed;1123formy$refspec(@refsspecs) {1124my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1125or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>\n");1126if($force) {1127print{*STDERR}"Warning: forced push not allowed on a MediaWiki.\n";1128}1129if($localeq EMPTY) {1130print{*STDERR}"Cannot delete remote branch on a MediaWiki\n";1131print{*STDOUT}"error ${remote} cannot delete\n";1132next;1133}1134if($remotene'refs/heads/master') {1135print{*STDERR}"Only push to the branch 'master' is supported on a MediaWiki\n";1136print{*STDOUT}"error ${remote} only master allowed\n";1137next;1138}1139if(mw_push_revision($local,$remote)) {1140$pushed=1;1141}1142}11431144# Notify Git that the push is done1145print{*STDOUT}"\n";11461147if($pushed&&$dumb_push) {1148print{*STDERR}"Just pushed some revisions to MediaWiki.\n";1149print{*STDERR}"The pushed revisions now have to be re-imported, and your current branch\n";1150print{*STDERR}"needs to be updated with these re-imported commits. You can do this with\n";1151print{*STDERR}"\n";1152print{*STDERR}" git pull --rebase\n";1153print{*STDERR}"\n";1154}1155return;1156}11571158sub mw_push_revision {1159my$local=shift;1160my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1161my$last_local_revid= get_last_local_revision();1162print{*STDERR}".\n";# Finish sentence started by get_last_local_revision()1163my$last_remote_revid= get_last_remote_revision();1164my$mw_revision=$last_remote_revid;11651166# Get sha1 of commit pointed by local HEAD1167my$HEAD_sha1= run_git("rev-parse ${local} 2>/dev/null");1168chomp($HEAD_sha1);1169# Get sha1 of commit pointed by remotes/$remotename/master1170my$remoteorigin_sha1= run_git("rev-parse refs/remotes/${remotename}/master 2>/dev/null");1171chomp($remoteorigin_sha1);11721173if($last_local_revid>0&&1174$last_local_revid<$last_remote_revid) {1175return error_non_fast_forward($remote);1176}11771178if($HEAD_sha1eq$remoteorigin_sha1) {1179# nothing to push1180return0;1181}11821183# Get every commit in between HEAD and refs/remotes/origin/master,1184# including HEAD and refs/remotes/origin/master1185my@commit_pairs= ();1186if($last_local_revid>0) {1187my$parsed_sha1=$remoteorigin_sha1;1188# Find a path from last MediaWiki commit to pushed commit1189print{*STDERR}"Computing path from local to remote ...\n";1190my@local_ancestry=split(/\n/, run_git("rev-list --boundary --parents ${local} ^${parsed_sha1}"));1191my%local_ancestry;1192foreachmy$line(@local_ancestry) {1193if(my($child,$parents) =$line=~/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {1194foreachmy$parent(split(/ /,$parents)) {1195$local_ancestry{$parent} =$child;1196}1197}elsif(!$line=~/^([a-f0-9]+)/) {1198die"Unexpected output from git rev-list: ${line}\n";1199}1200}1201while($parsed_sha1ne$HEAD_sha1) {1202my$child=$local_ancestry{$parsed_sha1};1203if(!$child) {1204print{*STDERR}"Cannot find a path in history from remote commit to last commit\n";1205return error_non_fast_forward($remote);1206}1207push(@commit_pairs, [$parsed_sha1,$child]);1208$parsed_sha1=$child;1209}1210}else{1211# No remote mediawiki revision. Export the whole1212# history (linearized with --first-parent)1213print{*STDERR}"Warning: no common ancestor, pushing complete history\n";1214my$history= run_git("rev-list --first-parent --children ${local}");1215my@history=split(/\n/,$history);1216@history=@history[1..$#history];1217foreachmy$line(reverse@history) {1218my@commit_info_split=split(/[ \n]/,$line);1219push(@commit_pairs, \@commit_info_split);1220}1221}12221223foreachmy$commit_info_split(@commit_pairs) {1224my$sha1_child= @{$commit_info_split}[0];1225my$sha1_commit= @{$commit_info_split}[1];1226my$diff_infos= run_git("diff-tree -r --raw -z ${sha1_child} ${sha1_commit}");1227# TODO: we could detect rename, and encode them with a #redirect on the wiki.1228# TODO: for now, it's just a delete+add1229my@diff_info_list=split(/\0/,$diff_infos);1230# Keep the subject line of the commit message as mediawiki comment for the revision1231my$commit_msg= run_git(qq(log --no-walk --format="%s" ${sha1_commit}));1232chomp($commit_msg);1233# Push every blob1234while(@diff_info_list) {1235my$status;1236# git diff-tree -z gives an output like1237# <metadata>\0<filename1>\01238# <metadata>\0<filename2>\01239# and we've split on \0.1240my$info=shift(@diff_info_list);1241my$file=shift(@diff_info_list);1242($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1243if($statuseq'non-fast-forward') {1244# we may already have sent part of the1245# commit to MediaWiki, but it's too1246# late to cancel it. Stop the push in1247# the middle, but still give an1248# accurate error message.1249return error_non_fast_forward($remote);1250}1251if($statusne'ok') {1252die("Unknown error from mw_push_file()\n");1253}1254}1255if(!$dumb_push) {1256 run_git(qq(notes --ref=${remotename}/mediawiki add -f -m "mediawiki_revision: ${mw_revision}" ${sha1_commit}));1257}1258}12591260print{*STDOUT}"ok ${remote}\n";1261return1;1262}12631264sub get_allowed_file_extensions {1265$mediawiki= connect_maybe($mediawiki,$remotename,$url);12661267my$query= {1268 action =>'query',1269 meta =>'siteinfo',1270 siprop =>'fileextensions'1271};1272my$result=$mediawiki->api($query);1273my@file_extensions=map{$_->{ext}} @{$result->{query}->{fileextensions}};1274my%hashFile=map{$_=>1}@file_extensions;12751276return%hashFile;1277}12781279# In memory cache for MediaWiki namespace ids.1280my%namespace_id;12811282# Namespaces whose id is cached in the configuration file1283# (to avoid duplicates)1284my%cached_mw_namespace_id;12851286# Return MediaWiki id for a canonical namespace name.1287# Ex.: "File", "Project".1288sub get_mw_namespace_id {1289$mediawiki= connect_maybe($mediawiki,$remotename,$url);1290my$name=shift;12911292if(!exists$namespace_id{$name}) {1293# Look at configuration file, if the record for that namespace is1294# already cached. Namespaces are stored in form:1295# "Name_of_namespace:Id_namespace", ex.: "File:6".1296my@temp=split(/\n/,1297 run_git("config --get-all remote.${remotename}.namespaceCache"));1298chomp(@temp);1299foreachmy$ns(@temp) {1300my($n,$id) =split(/:/,$ns);1301if($ideq'notANameSpace') {1302$namespace_id{$n} = {is_namespace =>0};1303}else{1304$namespace_id{$n} = {is_namespace =>1, id =>$id};1305}1306$cached_mw_namespace_id{$n} =1;1307}1308}13091310if(!exists$namespace_id{$name}) {1311print{*STDERR}"Namespace ${name} not found in cache, querying the wiki ...\n";1312# NS not found => get namespace id from MW and store it in1313# configuration file.1314my$query= {1315 action =>'query',1316 meta =>'siteinfo',1317 siprop =>'namespaces'1318};1319my$result=$mediawiki->api($query);13201321while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1322if(defined($ns->{id}) &&defined($ns->{canonical})) {1323$namespace_id{$ns->{canonical}} = {is_namespace =>1, id =>$ns->{id}};1324if($ns->{'*'}) {1325# alias (e.g. french Fichier: as alias for canonical File:)1326$namespace_id{$ns->{'*'}} = {is_namespace =>1, id =>$ns->{id}};1327}1328}1329}1330}13311332my$ns=$namespace_id{$name};1333my$id;13341335if(!defined$ns) {1336print{*STDERR}"No such namespace ${name} on MediaWiki.\n";1337$ns= {is_namespace =>0};1338$namespace_id{$name} =$ns;1339}13401341if($ns->{is_namespace}) {1342$id=$ns->{id};1343}13441345# Store "notANameSpace" as special value for inexisting namespaces1346my$store_id= ($id||'notANameSpace');13471348# Store explicitly requested namespaces on disk1349if(!exists$cached_mw_namespace_id{$name}) {1350 run_git(qq(config --add remote.${remotename}.namespaceCache "${name}:${store_id}"));1351$cached_mw_namespace_id{$name} =1;1352}1353return$id;1354}13551356sub get_mw_namespace_id_for_page {1357my$namespace=shift;1358if($namespace=~/^([^:]*):/) {1359return get_mw_namespace_id($namespace);1360}else{1361return;1362}1363}