Drupal2wiki

From GMOD
Revision as of 14:43, 1 October 2007 by Clements (Talk | contribs)

Jump to: navigation, search

Perl script written by Eric Just to copy Drupal nodes to a MediaWiki:

<perl>

  1. !/usr/bin/perl -w

use strict; use WWW::Mechanize; use HTML::TreeBuilder::XPath; use HTML::WikiConverter; use Data::Dumper;

my $mech = WWW::Mechanize->new();

my $wiki_url = "http://wiki.gmod.org/index.php";

my @urls = qw ( http://www.gmod.org/face_caucus_apidb_user_studies_and_impact_on_development http://www.gmod.org/face_caucus_sgn_associating_solanaceae_loci_with_phenotype http://www.gmod.org/face_caucus_ucsc_user_interface_issues_challenges_in_a_many_organism_database http://www.gmod.org/january_2007_meeting http://www.gmod.org/nih_2002_workshop_on_model_organism_databases http://www.gmod.org/user-interface-caucus http://www.gmod.org/mod_user_interfaces_outline_of_topics http://www.gmod.org/mod_user_interfaces_sample_of_several_mod_top_level_functions );


foreach my $url_to_convert (@urls) {

  $mech->get( $url_to_convert );
  warn "could not get page: $url_to_convert" unless $mech->success();
  next unless $mech->success();
  my $tree= new HTML::TreeBuilder::XPath();
  $tree->parse( $mech->content() );
  #Get title
  my $title = $tree->findvalue( '//h1[@class="title"]')->value();
  if ( !$title ) {
     $title = $tree->findvalue( '//title')->value();
     $title =~ s/ [|].+//g;
  }
  die "could not parse title" if !$title;
  # delete navigation links for now
  my $nav_node = $tree->findnodes( '//div[@id="main"]//div[@class="nav"]')->[0];
  $nav_node->delete() if $nav_node;
  # delete submitted span
  my $submitted_nodes = $tree->findnodes( '//span[@class="submitted"]');
  map { $_->delete() } @$submitted_nodes;
  # delete drupal links section
  my $links_nodes = $tree->findnodes( '//div[@class="links"]');
  map { $_->delete() } @$links_nodes;
  my $content_nodes = $tree->findnodes( '//div[@id="main"]//div[@class="content"]');
  my $count = 0;
  # wiki page text
  my $wiki_text;
  foreach my $node ( @{$content_nodes} ) {
      my $html = $node->as_HTML();
      $html =~ s/ style="{?([^}"]+)}?"//g; # "
      # convert to wikimedia format
      my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
      my $converted_text = $wc->html2wiki( $html );
      # strip out leftover div tags
      $converted_text =~ s/<\/?div[^>]*>//g;
      # Now all internal links (not starting with http)
      # change [some link]
      # to:  some link
      # change [?q=node/71 GBrowse] into GBrowse
      $converted_text =~ s/\[(?!http)([^\s]*) ([^\]]+)\]/$2/g;
      # append node to new wiki page text
      $wiki_text .= $converted_text."\n";
  }
   # now simply find or create the page
   # and paste wiki text into edit box,  
   # submit the form, and there's your new page!
   my $url = $wiki_url."?title=".$title."&action=edit";
   eval {
      warn $url;
      $mech->get($url);
      die unless ($mech->success);
      $mech->submit_form(
           form_number => 1,
           fields      => { wpTextbox1 => $wiki_text},
       );
      die unless ($mech->success);
   };
   if($@) {
       print "An error occured : ".$@."\n";
   }
   else {
      print "converted $title\n";
   }

} </perl>


Catgory:Wiki Tools