logo       

[MediaWiki-CVS] SVN: [54087] trunk/mwdumper: msg#01465

mediawiki-cvs

Subject: [MediaWiki-CVS] SVN: [54087] trunk/mwdumper

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/54087

Revision: 54087
Author: daniel
Date: 2009-07-31 10:39:07 +0000 (Fri, 31 Jul 2009)

Log Message:
-----------
support for rev_deleted; bump pom.xml to version 1.16

Modified Paths:
--------------
trunk/mwdumper/pom.xml
trunk/mwdumper/src/org/mediawiki/importer/Contributor.java
trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java
trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java
trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java
trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java
trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java

Modified: trunk/mwdumper/pom.xml
===================================================================
--- trunk/mwdumper/pom.xml 2009-07-31 10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/pom.xml 2009-07-31 10:39:07 UTC (rev 54087)
@@ -5,7 +5,7 @@
<name>mwdumper</name>
<groupId>org.wikimedia</groupId>
<artifactId>mwdumper</artifactId>
- <version>1.11</version>
+ <version>1.16</version>
<packaging>jar</packaging>
<url>http://www.mediawiki.org/wiki/MWDumper</url>

@@ -78,6 +78,23 @@
</includes>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.2</version>
+ <configuration>
+ <archive>
+ <manifest>
+ <mainClass>org.mediawiki.dumper.Dumper</mainClass>
+ <packageName>org.mediawiki</packageName>
+ </manifest>
+ <manifestEntries>
+ <mode>development</mode>
+ <url>${pom.url}</url>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
</plugins>

<resources> <!-- include all non-java files as resources -->

Modified: trunk/mwdumper/src/org/mediawiki/importer/Contributor.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/Contributor.java 2009-07-31
10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/Contributor.java 2009-07-31
10:39:07 UTC (rev 54087)
@@ -28,10 +28,10 @@
public class Contributor {
public String Username;
public int Id;
-
- public boolean isAnon() {
- // Fixme; dumps w/o id numbers...
- return (Id == 0);
+ public boolean isIP = false;
+
+ public Contributor() {
+ this(null, 0);
}

public Contributor(String username, int id) {
@@ -39,8 +39,4 @@
Id = id;
}

- public Contributor(String ip) {
- Username = ip;
- Id = 0;
- }
}

Modified: trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java 2009-07-31
10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java 2009-07-31
10:39:07 UTC (rev 54087)
@@ -63,10 +63,10 @@
{"old_id", new Integer(revision.Id)},
{"old_namespace", page.Title.Namespace},
{"old_title", titleFormat(page.Title.Text)},
- {"old_text", revision.Text},
- {"old_comment", revision.Comment},
- {"old_user", new
Integer(revision.Contributor.Id)},
- {"old_user_text",
revision.Contributor.Username},
+ {"old_text", revision.Text == null ? "" :
revision.Text},
+ {"old_comment", revision.Comment == null ? "" :
revision.Comment},
+ {"old_user", revision.Contributor.Username ==
null ? ZERO : new Integer(revision.Contributor.Id)},
+ {"old_user_text", revision.Contributor.Username
== null ? "" : revision.Contributor.Username},
{"old_timestamp",
timestampFormat(revision.Timestamp)},
{"old_minor_edit", revision.Minor ? ONE : ZERO},
{"old_flags", "utf-8"},
@@ -78,10 +78,10 @@
{"cur_id", new Integer(page.Id)},
{"cur_namespace", page.Title.Namespace},
{"cur_title", titleFormat(page.Title.Text)},
- {"cur_text", revision.Text},
- {"cur_comment", revision.Comment},
- {"cur_user", new
Integer(revision.Contributor.Id)},
- {"cur_user_text",
revision.Contributor.Username},
+ {"cur_text", revision.Text == null ? "" :
revision.Text},
+ {"cur_comment", revision.Comment == null ? "" :
revision.Comment},
+ {"cur_user", revision.Contributor.Username ==
null ? ZERO : new Integer(revision.Contributor.Id)},
+ {"cur_user_text", revision.Contributor.Username
== null ? "" : revision.Contributor.Username},
{"cur_timestamp",
timestampFormat(revision.Timestamp)},
{"cur_restrictions", page.Restrictions},
{"cur_counter", ZERO},

Modified: trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java 2009-07-31
10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java 2009-07-31
10:39:07 UTC (rev 54087)
@@ -59,22 +59,32 @@
lastRevision = null;
}

+ static final int DELETED_TEXT = 1;
+ static final int DELETED_COMMENT = 2;
+ static final int DELETED_USER = 4;
+ static final int DELETED_RESTRICTED = 8;
+
public void writeRevision(Revision revision) throws IOException {
bufferInsertRow(traits.getTextTable(), new Object[][] {
{"old_id", new Integer(revision.Id)},
- {"old_text", revision.Text},
+ {"old_text", revision.Text == null ? "" :
revision.Text},
{"old_flags", "utf-8"}});
+
+ int rev_deleted = 0;
+ if (revision.Contributor.Username==null) rev_deleted |=
DELETED_USER;
+ if (revision.Comment==null) rev_deleted |= DELETED_COMMENT;
+ if (revision.Text==null) rev_deleted |= DELETED_TEXT;

bufferInsertRow("revision", new Object[][] {
{"rev_id", new Integer(revision.Id)},
{"rev_page", new Integer(currentPage.Id)},
{"rev_text_id", new Integer(revision.Id)},
- {"rev_comment", revision.Comment},
- {"rev_user", new
Integer(revision.Contributor.Id)},
- {"rev_user_text",
revision.Contributor.Username},
+ {"rev_comment", revision.Comment == null ? "" :
revision.Comment},
+ {"rev_user", revision.Contributor.Username ==
null ? ZERO : new Integer(revision.Contributor.Id)},
+ {"rev_user_text", revision.Contributor.Username
== null ? "" : revision.Contributor.Username},
{"rev_timestamp",
timestampFormat(revision.Timestamp)},
{"rev_minor_edit", revision.Minor ? ONE : ZERO},
- {"rev_deleted", ZERO}});
+ {"rev_deleted", rev_deleted==0 ? ZERO : new
Integer(rev_deleted) }});

lastRevision = revision;
}

Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java
2009-07-31 10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java
2009-07-31 10:39:07 UTC (rev 54087)
@@ -47,6 +47,8 @@

private char[] buffer;
private int len;
+ private boolean hasContent = false;
+ private boolean deleted = false;

Siteinfo siteinfo;
Page page;
@@ -69,6 +71,7 @@
this.writer = writer;
buffer = new char[4096];
len = 0;
+ hasContent = false;
}

/**
@@ -150,8 +153,15 @@
// if and when character data arrives -- at that point we
// have a length.
len = 0;
+ hasContent = false;
+
if (abortFlag)
throw new SAXException("XmlDumpReader set abort flag.");
+
+ // check for deleted="deleted", and set deleted flag for the
current element.
+ String d = attributes.getValue("deleted");
+ deleted = (d!=null && d.equals("deleted"));
+
try {
qName = (String)startElements.get(qName);
if (qName == null)
@@ -181,6 +191,7 @@
}
System.arraycopy(ch, start, buffer, len, length);
len += length;
+ hasContent = true;
}

public void endElement(String uri, String localname, String qName)
throws SAXException {
@@ -243,6 +254,11 @@
writer.writeSiteinfo(siteinfo);
}

+ private String bufferContentsOrNull() {
+ if (!hasContent) return null;
+ else return bufferContents();
+ }
+
private String bufferContents() {
return len == 0 ? "" : new String(buffer, 0, len);
}
@@ -298,7 +314,7 @@

void readId() {
int id = Integer.parseInt(bufferContents());
- if (contrib != null)
+ if (contrib != null)
contrib.Id = id;
else if (rev != null)
rev.Id = id;
@@ -333,7 +349,8 @@
}

void readComment() {
- rev.Comment = bufferContents();
+ rev.Comment = bufferContentsOrNull();
+ if (rev.Comment==null && !deleted) rev.Comment = ""; //NOTE:
null means deleted/supressed
}

void readMinor() {
@@ -341,29 +358,30 @@
}

void readText() {
- rev.Text = bufferContents();
+ rev.Text = bufferContentsOrNull();
+ if (rev.Text==null && !deleted) rev.Text = ""; //NOTE: null
means deleted/supressed
}

// -----------
void openContributor() {
- contrib = null;
+ //XXX: record deleted flag?! as it is, any empty <contributor>
tag counts as "deleted"
+ contrib = new Contributor();
}

void closeContributor() {
- if (contrib == null)
- throw new IllegalArgumentException("Invalid
contributor");
-
+ //NOTE: if the contributor was supressed, nither username nor
id have been set in the Contributor object
rev.Contributor = contrib;
contrib = null;
}


void readUsername() {
- contrib = new Contributor(bufferContents());
+ contrib.Username = bufferContentsOrNull();
}

void readIp() {
- contrib = new Contributor(bufferContents());
+ contrib.Username = bufferContents();
+ contrib.isIP = true;
}

private static final TimeZone utc = TimeZone.getTimeZone("UTC");

Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java
2009-07-31 10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java
2009-07-31 10:39:07 UTC (rev 54087)
@@ -118,14 +118,22 @@
writer.emptyElement("minor");
}

- if (rev.Comment != null && rev.Comment.length() != 0)
+ if (rev.Comment == null) {
+ writer.emptyElement("comment", deletedAttrib);
+ }
+ else if (rev.Comment.length() != 0) {
writer.textElement("comment", rev.Comment);
+ }

- writer.textElement("text", rev.Text, new String[][] {
- {"xml:space", "preserve"}});
+ writer.textElement("text", rev.Text,
+ rev.Text==null ? new String[][] {{"xml:space",
"preserve"}, {"deleted", "deleted"}}
+
: new String[][] {{"xml:space", "preserve"}}
+ );

writer.closeElement();
}
+
+ static final String[][] deletedAttrib = new String[][] { {"deleted",
"deleted"} };

static String formatTimestamp(Calendar ts) {
return dateFormat.format(ts.getTime());
@@ -133,13 +141,19 @@

void writeContributor(Contributor contrib) throws IOException {
XmlWriter writer = this.writer;
- writer.openElement("contributor");
- if (contrib.isAnon()) {
- writer.textElement("ip", contrib.Username);
- } else {
- writer.textElement("username", contrib.Username);
- writer.textElement("id", Integer.toString(contrib.Id));
+
+ if (contrib.Username==null) {
+ writer.emptyElement("contributor", deletedAttrib);
}
- writer.closeElement();
+ else {
+ writer.openElement("contributor");
+ if (contrib.isIP) {
+ writer.textElement("ip", contrib.Username);
+ } else {
+ writer.textElement("username",
contrib.Username);
+ writer.textElement("id",
Integer.toString(contrib.Id));
+ }
+ writer.closeElement();
+ }
}
}

Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java
===================================================================
--- trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java 2009-07-31
10:21:02 UTC (rev 54086)
+++ trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java 2009-07-31
10:39:07 UTC (rev 54087)
@@ -106,7 +106,7 @@
}

public void textElement(String element, String text, String[][]
attributes) throws IOException {
- if (text.length() == 0) {
+ if (text==null || text.length() == 0) {
emptyElement(element, attributes);
} else {
startElement(element, attributes, ">");



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@xxxxxxxxxxxxxxxxxxx
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | Mail Home | sitemap | FAQ | advertise