|
[MediaWiki-CVS] SVN: [54044] trunk/WikiWord/WikiWordBuilder/src/main/java/d: msg#01422mediawiki-cvs
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/54044 Revision: 54044 Author: daniel Date: 2009-07-30 20:28:41 +0000 (Thu, 30 Jul 2009) Log Message: ----------- optimized regex generated by templatePattern; previous try performed terribly Modified Paths: -------------- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java 2009-07-30 20:20:26 UTC (rev 54043) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java 2009-07-30 20:28:41 UTC (rev 54044) @@ -566,7 +566,11 @@ * @param more if true, the template is allowed to have more parameters, which are hoewever not captured. */ public static Pattern templatePattern(String name, int params, boolean more) { - String s = templatePatternString(name, params, more); + return templatePattern(name, params, more, false); + } + + public static Pattern templatePattern(String name, int params, boolean more, boolean stripNamedParams) { + String s = templatePatternString(name, params, more, stripNamedParams); return Pattern.compile(s, Pattern.CASE_INSENSITIVE | Pattern.DOTALL); } @@ -580,13 +584,17 @@ * @param more if true, the template is allowed to have more parameters, which are hoewever not captured. */ public static String templatePatternString(String name, int params, boolean more) { + return templatePatternString(name, params, more, false); + } + + public static String templatePatternString(String name, int params, boolean more, boolean stripNamedParams) { String s = "\\{\\{\\s*"; - s+= "("+name+")"; + s+= "("+name+")\\s*"; - s+= "(?:\\s*\\|[^|=]*?=[^|]*?\\s*)*"; + if (stripNamedParams) s+= "(?:\\s*\\|[^|={}]*=[^|{}]*\\s*)*"; for (int i=0; i<params; i++){ - s+= "\\|([^|=]*?)\\s*"; + s+= "\\|([^|={}]*)\\s*"; } if (more) s+= "(\\s*\\|.*?)?"; Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java 2009-07-30 20:20:26 UTC (rev 54043) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java 2009-07-30 20:28:41 UTC (rev 54044) @@ -69,9 +69,9 @@ stripClutterManglers.add( new WikiTextAnalyzer.RegularExpressionMangler("\\{\\{\\s*Auto[ _](.+?)\\s*\\|\\s*(.*?)(\\|.*?)?\\s*\\}\\}", "$1 $2", Pattern.CASE_INSENSITIVE)); */ - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Birth[-_ ]date|BrithDate|Dob|Age|Birth[_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda", 3, true), "$2-$3-$4" ) ); - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[_ ]date[_ ]and[_ ]age|DeathDateAndAge|Dda", 6, true), "$2-$3-$4 – $5-$6-$7" ) ); - stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[-_ ]date|DeathDate|Dod", 3, true), "$2-$3-$4" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Birth[-_ ]date|BrithDate|Bday|Dob|Age|Birth[-_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda", 3, true, true), "$2-$3-$4" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[-_ ]date[_ ]and[_ ]age|DeathDateAndAge|Dda", 6, true, true), "$2-$3-$4 – $5-$6-$7" ) ); + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Death[-_ ]date|DeathDate|Dod", 3, true, true), "$2-$3-$4" ) ); stripClutterManglers.add( new RegularExpressionMangler("^"+templatePatternString("wrapper", 0, true), "{|", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE)); stripClutterManglers.add( new RegularExpressionMangler("^"+templatePatternString("end|col-end", 0, true), "|}", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE)); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@xxxxxxxxxxxxxxxxxxx https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|
|
||||||||||||||||||||||||||
|
|
|
| News | Mail Home | sitemap | FAQ | advertise |