logo       

[MediaWiki-CVS] SVN: [54044] trunk/WikiWord/WikiWordBuilder/src/main/java/d: msg#01422

mediawiki-cvs

Subject: [MediaWiki-CVS] SVN: [54044] trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/ wikiword

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/54044

Revision: 54044
Author: daniel
Date: 2009-07-30 20:28:41 +0000 (Thu, 30 Jul 2009)

Log Message:
-----------
optimized regex generated by templatePattern; previous try performed terribly

Modified Paths:
--------------

trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java

trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java

Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java
2009-07-30 20:20:26 UTC (rev 54043)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java
2009-07-30 20:28:41 UTC (rev 54044)
@@ -566,7 +566,11 @@
* @param more if true, the template is allowed to have more
parameters, which are hoewever not captured.
*/
public static Pattern templatePattern(String name, int params, boolean
more) {
- String s = templatePatternString(name, params, more);
+ return templatePattern(name, params, more, false);
+ }
+
+ public static Pattern templatePattern(String name, int params, boolean
more, boolean stripNamedParams) {
+ String s = templatePatternString(name, params, more,
stripNamedParams);
return Pattern.compile(s, Pattern.CASE_INSENSITIVE |
Pattern.DOTALL);
}

@@ -580,13 +584,17 @@
* @param more if true, the template is allowed to have more
parameters, which are hoewever not captured.
*/
public static String templatePatternString(String name, int params,
boolean more) {
+ return templatePatternString(name, params, more, false);
+ }
+
+ public static String templatePatternString(String name, int params,
boolean more, boolean stripNamedParams) {
String s = "\\{\\{\\s*";
- s+= "("+name+")";
+ s+= "("+name+")\\s*";

- s+= "(?:\\s*\\|[^|=]*?=[^|]*?\\s*)*";
+ if (stripNamedParams) s+= "(?:\\s*\\|[^|={}]*=[^|{}]*\\s*)*";

for (int i=0; i<params; i++){
- s+= "\\|([^|=]*?)\\s*";
+ s+= "\\|([^|={}]*)\\s*";
}

if (more) s+= "(\\s*\\|.*?)?";

Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
2009-07-30 20:20:26 UTC (rev 54043)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
2009-07-30 20:28:41 UTC (rev 54044)
@@ -69,9 +69,9 @@
stripClutterManglers.add( new
WikiTextAnalyzer.RegularExpressionMangler("\\{\\{\\s*Auto[
_](.+?)\\s*\\|\\s*(.*?)(\\|.*?)?\\s*\\}\\}", "$1 $2",
Pattern.CASE_INSENSITIVE));
*/

- stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Birth[-_
]date|BrithDate|Dob|Age|Birth[_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda", 3,
true), "$2-$3-$4" ) );
- stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Death[_ ]date[_ ]and[_
]age|DeathDateAndAge|Dda", 6, true), "$2-$3-$4 &ndash; $5-$6-$7" ) );
- stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Death[-_ ]date|DeathDate|Dod", 3,
true), "$2-$3-$4" ) );
+ stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Birth[-_
]date|BrithDate|Bday|Dob|Age|Birth[-_ ]date[_ ]and[_ ]age|BirthDateAndAge|Bda",
3, true, true), "$2-$3-$4" ) );
+ stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Death[-_ ]date[_ ]and[_
]age|DeathDateAndAge|Dda", 6, true, true), "$2-$3-$4 &ndash; $5-$6-$7" ) );
+ stripClutterManglers.add( new
RegularExpressionMangler(templatePattern("Death[-_ ]date|DeathDate|Dod", 3,
true, true), "$2-$3-$4" ) );

stripClutterManglers.add( new
RegularExpressionMangler("^"+templatePatternString("wrapper", 0, true), "{|",
Pattern.MULTILINE | Pattern.CASE_INSENSITIVE));
stripClutterManglers.add( new
RegularExpressionMangler("^"+templatePatternString("end|col-end", 0, true),
"|}", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE));



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@xxxxxxxxxxxxxxxxxxx
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | Mail Home | sitemap | FAQ | advertise