|
[MediaWiki-CVS] SVN: [54100] trunk/WikiWord/WikiWordBuilder/src/main/java/d: msg#01478mediawiki-cvs
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/54100 Revision: 54100 Author: daniel Date: 2009-07-31 15:00:13 +0000 (Fri, 31 Jul 2009) Log Message: ----------- more filters for enwiki Modified Paths: -------------- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java 2009-07-31 14:30:14 UTC (rev 54099) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java 2009-07-31 15:00:13 UTC (rev 54100) @@ -69,9 +69,10 @@ stripClutterManglers.add( new WikiTextAnalyzer.RegularExpressionMangler("\\{\\{\\s*Auto[ _](.+?)\\s*\\|\\s*(.*?)(\\|.*?)?\\s*\\}\\}", "$1 $2", Pattern.CASE_INSENSITIVE)); */ - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Birth[-_ ]date|BrithDate|Bday|Dob|Age|Birth[-_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda", 3, true, true), "$2-$3-$4" ) ); - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[-_ ]date[_ ]and[_ ]age|DeathDateAndAge|Dda", 6, true, true), "$2-$3-$4 – $5-$6-$7" ) ); - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[-_ ]date|DeathDate|Dod", 3, true, true), "$2-$3-$4" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Birth[_ ]date|BrithDate|Bday|Dob|Age|Birth[-_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda", 3, true, true), "$2-$3-$4" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[_ ]date[_ ]and[_ ]age|DeathDateAndAge|Dda", 6, true, true), "$2-$3-$4 – $5-$6-$7" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[_ ]date|DeathDate|Dod", 3, true, true), "$2-$3-$4" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("(start|end|birth|death)-date|OldStyleDate", 1, true, true), "$1" ) ); stripClutterManglers.add( new RegularExpressionMangler("^"+templatePatternString("wrapper", 0, true), "{|", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE)); stripClutterManglers.add( new RegularExpressionMangler("^"+templatePatternString("end|col-end", 0, true), "|}", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE)); @@ -84,8 +85,10 @@ stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Okina", 0, false), "\u02BB")); stripClutterManglers.add( new RegularExpressionMangler(templatePattern("0", 0, true), " ")); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("ndash", 0, true), "–")); stripClutterManglers.add( new RegularExpressionMangler(templatePattern("\u00b7|moddot|dot", 0, false), "\u00b7")); stripClutterManglers.add( new RegularExpressionMangler(templatePattern("spaces", 1, true), " ")); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("nbsp", 1, true), " ")); stripClutterManglers.add( new RegularExpressionMangler( templatePattern( @@ -94,7 +97,7 @@ "Tnavbar|Navbox([ _]generic)?|redirect|pp-.*?|" + "ambox|wikify|pov|cleanup|globalize|split|current|issue|merge|" + "Coor([ _]\\w+)?|Coord|reflist|precision[-\\w\\d]+|nowrap[ _]begin|" + - "Audio|\\w+[ _]icon|lang-\\w+|Flagicon|" + + "Audio|\\w+[ _]icon|lang-\\w+|Flagicon|Flag|Flagcountry|" + "Main|" + "redirect" //maybe keep that? but we need this for the :'' stripping , 0, true), "")); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@xxxxxxxxxxxxxxxxxxx https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|
|
||||||||||||||||||||||||||
|
|
|
| News | Mail Home | sitemap | FAQ | advertise |