|
[MediaWiki-CVS] SVN: [54087] trunk/mwdumper: msg#01465mediawiki-cvs
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/54087 Revision: 54087 Author: daniel Date: 2009-07-31 10:39:07 +0000 (Fri, 31 Jul 2009) Log Message: ----------- support for rev_deleted; bump pom.xml to version 1.16 Modified Paths: -------------- trunk/mwdumper/pom.xml trunk/mwdumper/src/org/mediawiki/importer/Contributor.java trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java Modified: trunk/mwdumper/pom.xml =================================================================== --- trunk/mwdumper/pom.xml 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/pom.xml 2009-07-31 10:39:07 UTC (rev 54087) @@ -5,7 +5,7 @@ <name>mwdumper</name> <groupId>org.wikimedia</groupId> <artifactId>mwdumper</artifactId> - <version>1.11</version> + <version>1.16</version> <packaging>jar</packaging> <url>http://www.mediawiki.org/wiki/MWDumper</url> @@ -78,6 +78,23 @@ </includes> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <version>2.2</version> + <configuration> + <archive> + <manifest> + <mainClass>org.mediawiki.dumper.Dumper</mainClass> + <packageName>org.mediawiki</packageName> + </manifest> + <manifestEntries> + <mode>development</mode> + <url>${pom.url}</url> + </manifestEntries> + </archive> + </configuration> + </plugin> </plugins> <resources> <!-- include all non-java files as resources --> Modified: trunk/mwdumper/src/org/mediawiki/importer/Contributor.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/Contributor.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/Contributor.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -28,10 +28,10 @@ public class Contributor { public String Username; public int Id; - - public boolean isAnon() { - // Fixme; dumps w/o id numbers... - return (Id == 0); + public boolean isIP = false; + + public Contributor() { + this(null, 0); } public Contributor(String username, int id) { @@ -39,8 +39,4 @@ Id = id; } - public Contributor(String ip) { - Username = ip; - Id = 0; - } } Modified: trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/SqlWriter14.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -63,10 +63,10 @@ {"old_id", new Integer(revision.Id)}, {"old_namespace", page.Title.Namespace}, {"old_title", titleFormat(page.Title.Text)}, - {"old_text", revision.Text}, - {"old_comment", revision.Comment}, - {"old_user", new Integer(revision.Contributor.Id)}, - {"old_user_text", revision.Contributor.Username}, + {"old_text", revision.Text == null ? "" : revision.Text}, + {"old_comment", revision.Comment == null ? "" : revision.Comment}, + {"old_user", revision.Contributor.Username == null ? ZERO : new Integer(revision.Contributor.Id)}, + {"old_user_text", revision.Contributor.Username == null ? "" : revision.Contributor.Username}, {"old_timestamp", timestampFormat(revision.Timestamp)}, {"old_minor_edit", revision.Minor ? ONE : ZERO}, {"old_flags", "utf-8"}, @@ -78,10 +78,10 @@ {"cur_id", new Integer(page.Id)}, {"cur_namespace", page.Title.Namespace}, {"cur_title", titleFormat(page.Title.Text)}, - {"cur_text", revision.Text}, - {"cur_comment", revision.Comment}, - {"cur_user", new Integer(revision.Contributor.Id)}, - {"cur_user_text", revision.Contributor.Username}, + {"cur_text", revision.Text == null ? "" : revision.Text}, + {"cur_comment", revision.Comment == null ? "" : revision.Comment}, + {"cur_user", revision.Contributor.Username == null ? ZERO : new Integer(revision.Contributor.Id)}, + {"cur_user_text", revision.Contributor.Username == null ? "" : revision.Contributor.Username}, {"cur_timestamp", timestampFormat(revision.Timestamp)}, {"cur_restrictions", page.Restrictions}, {"cur_counter", ZERO}, Modified: trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/SqlWriter15.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -59,22 +59,32 @@ lastRevision = null; } + static final int DELETED_TEXT = 1; + static final int DELETED_COMMENT = 2; + static final int DELETED_USER = 4; + static final int DELETED_RESTRICTED = 8; + public void writeRevision(Revision revision) throws IOException { bufferInsertRow(traits.getTextTable(), new Object[][] { {"old_id", new Integer(revision.Id)}, - {"old_text", revision.Text}, + {"old_text", revision.Text == null ? "" : revision.Text}, {"old_flags", "utf-8"}}); + + int rev_deleted = 0; + if (revision.Contributor.Username==null) rev_deleted |= DELETED_USER; + if (revision.Comment==null) rev_deleted |= DELETED_COMMENT; + if (revision.Text==null) rev_deleted |= DELETED_TEXT; bufferInsertRow("revision", new Object[][] { {"rev_id", new Integer(revision.Id)}, {"rev_page", new Integer(currentPage.Id)}, {"rev_text_id", new Integer(revision.Id)}, - {"rev_comment", revision.Comment}, - {"rev_user", new Integer(revision.Contributor.Id)}, - {"rev_user_text", revision.Contributor.Username}, + {"rev_comment", revision.Comment == null ? "" : revision.Comment}, + {"rev_user", revision.Contributor.Username == null ? ZERO : new Integer(revision.Contributor.Id)}, + {"rev_user_text", revision.Contributor.Username == null ? "" : revision.Contributor.Username}, {"rev_timestamp", timestampFormat(revision.Timestamp)}, {"rev_minor_edit", revision.Minor ? ONE : ZERO}, - {"rev_deleted", ZERO}}); + {"rev_deleted", rev_deleted==0 ? ZERO : new Integer(rev_deleted) }}); lastRevision = revision; } Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/XmlDumpReader.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -47,6 +47,8 @@ private char[] buffer; private int len; + private boolean hasContent = false; + private boolean deleted = false; Siteinfo siteinfo; Page page; @@ -69,6 +71,7 @@ this.writer = writer; buffer = new char[4096]; len = 0; + hasContent = false; } /** @@ -150,8 +153,15 @@ // if and when character data arrives -- at that point we // have a length. len = 0; + hasContent = false; + if (abortFlag) throw new SAXException("XmlDumpReader set abort flag."); + + // check for deleted="deleted", and set deleted flag for the current element. + String d = attributes.getValue("deleted"); + deleted = (d!=null && d.equals("deleted")); + try { qName = (String)startElements.get(qName); if (qName == null) @@ -181,6 +191,7 @@ } System.arraycopy(ch, start, buffer, len, length); len += length; + hasContent = true; } public void endElement(String uri, String localname, String qName) throws SAXException { @@ -243,6 +254,11 @@ writer.writeSiteinfo(siteinfo); } + private String bufferContentsOrNull() { + if (!hasContent) return null; + else return bufferContents(); + } + private String bufferContents() { return len == 0 ? "" : new String(buffer, 0, len); } @@ -298,7 +314,7 @@ void readId() { int id = Integer.parseInt(bufferContents()); - if (contrib != null) + if (contrib != null) contrib.Id = id; else if (rev != null) rev.Id = id; @@ -333,7 +349,8 @@ } void readComment() { - rev.Comment = bufferContents(); + rev.Comment = bufferContentsOrNull(); + if (rev.Comment==null && !deleted) rev.Comment = ""; //NOTE: null means deleted/supressed } void readMinor() { @@ -341,29 +358,30 @@ } void readText() { - rev.Text = bufferContents(); + rev.Text = bufferContentsOrNull(); + if (rev.Text==null && !deleted) rev.Text = ""; //NOTE: null means deleted/supressed } // ----------- void openContributor() { - contrib = null; + //XXX: record deleted flag?! as it is, any empty <contributor> tag counts as "deleted" + contrib = new Contributor(); } void closeContributor() { - if (contrib == null) - throw new IllegalArgumentException("Invalid contributor"); - + //NOTE: if the contributor was supressed, nither username nor id have been set in the Contributor object rev.Contributor = contrib; contrib = null; } void readUsername() { - contrib = new Contributor(bufferContents()); + contrib.Username = bufferContentsOrNull(); } void readIp() { - contrib = new Contributor(bufferContents()); + contrib.Username = bufferContents(); + contrib.isIP = true; } private static final TimeZone utc = TimeZone.getTimeZone("UTC"); Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/XmlDumpWriter.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -118,14 +118,22 @@ writer.emptyElement("minor"); } - if (rev.Comment != null && rev.Comment.length() != 0) + if (rev.Comment == null) { + writer.emptyElement("comment", deletedAttrib); + } + else if (rev.Comment.length() != 0) { writer.textElement("comment", rev.Comment); + } - writer.textElement("text", rev.Text, new String[][] { - {"xml:space", "preserve"}}); + writer.textElement("text", rev.Text, + rev.Text==null ? new String[][] {{"xml:space", "preserve"}, {"deleted", "deleted"}} + : new String[][] {{"xml:space", "preserve"}} + ); writer.closeElement(); } + + static final String[][] deletedAttrib = new String[][] { {"deleted", "deleted"} }; static String formatTimestamp(Calendar ts) { return dateFormat.format(ts.getTime()); @@ -133,13 +141,19 @@ void writeContributor(Contributor contrib) throws IOException { XmlWriter writer = this.writer; - writer.openElement("contributor"); - if (contrib.isAnon()) { - writer.textElement("ip", contrib.Username); - } else { - writer.textElement("username", contrib.Username); - writer.textElement("id", Integer.toString(contrib.Id)); + + if (contrib.Username==null) { + writer.emptyElement("contributor", deletedAttrib); } - writer.closeElement(); + else { + writer.openElement("contributor"); + if (contrib.isIP) { + writer.textElement("ip", contrib.Username); + } else { + writer.textElement("username", contrib.Username); + writer.textElement("id", Integer.toString(contrib.Id)); + } + writer.closeElement(); + } } } Modified: trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java =================================================================== --- trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java 2009-07-31 10:21:02 UTC (rev 54086) +++ trunk/mwdumper/src/org/mediawiki/importer/XmlWriter.java 2009-07-31 10:39:07 UTC (rev 54087) @@ -106,7 +106,7 @@ } public void textElement(String element, String text, String[][] attributes) throws IOException { - if (text.length() == 0) { + if (text==null || text.length() == 0) { emptyElement(element, attributes); } else { startElement(element, attributes, ">"); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@xxxxxxxxxxxxxxxxxxx https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|
|
||||||||||||||||||||||||||
|
|
|
| News | Mail Home | sitemap | FAQ | advertise |