Author: gregor
Date: Wed Aug 31 13:01:34 2005
New Revision: 265566
URL: http://svn.apache.org/viewcvs?rev=265566&view=rev
Log:
committed soc work from robert goene
Added:
lenya/sandbox/search/
lenya/sandbox/search/README-lenya-search
lenya/sandbox/search/src/
lenya/sandbox/search/src/cocoon/
lenya/sandbox/search/src/cocoon/local.blocks.properties
lenya/sandbox/search/src/java/
lenya/sandbox/search/src/java/org/
lenya/sandbox/search/src/java/org/apache/
lenya/sandbox/search/src/java/org/apache/cocoon/
lenya/sandbox/search/src/java/org/apache/cocoon/components/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/Index.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexException.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexStructure.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/IndexManager.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Indexer.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Searcher.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/fieldmodel/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/lucene2.roles
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/utils/
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java
lenya/sandbox/search/src/java/org/apache/cocoon/transformation/
lenya/sandbox/search/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java
lenya/sandbox/search/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java
lenya/sandbox/search/src/java/org/apache/lenya/
lenya/sandbox/search/src/java/org/apache/lenya/lenya.roles
lenya/sandbox/search/src/modules/
lenya/sandbox/search/src/modules/lucene/
lenya/sandbox/search/src/modules/lucene/config/
lenya/sandbox/search/src/modules/lucene/config/analyzer_manager.xconf
lenya/sandbox/search/src/modules/lucene/config/default_indexer.xconf
lenya/sandbox/search/src/modules/lucene/config/index_manager.xconf
lenya/sandbox/search/src/modules/lucene/config/parallel_indexer.xconf
lenya/sandbox/search/src/modules/lucene/config/usecase-delete.xconf
lenya/sandbox/search/src/modules/lucene/config/usecase-externalOpensearch.xconf
lenya/sandbox/search/src/modules/lucene/config/usecase-index.xconf
lenya/sandbox/search/src/modules/lucene/config/usecase-opensearch.xconf
lenya/sandbox/search/src/modules/lucene/config/usecase-search.xconf
lenya/sandbox/search/src/modules/sitetree/
lenya/sandbox/search/src/modules/sitetree/xslt/
lenya/sandbox/search/src/modules/sitetree/xslt/navigation/
lenya/sandbox/search/src/modules/sitetree/xslt/navigation/search.xsl
lenya/sandbox/search/src/webapp/
lenya/sandbox/search/src/webapp/WEB-INF/
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/perfieldconf.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Danish_da.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Dutch_nl.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/English_en.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Finnish_fi.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/French_fr.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/German_de.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Italian_it.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Norwegian_no.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Polish_pl.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Portuguese_pt.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Spanish_es.xml
lenya/sandbox/search/src/webapp/WEB-INF/analyzer/stopword/Swedish_sv.xml
lenya/sandbox/search/src/webapp/WEB-INF/xconf/
lenya/sandbox/search/src/webapp/WEB-INF/xconf/lucene2.xconf
lenya/sandbox/search/src/webapp/lenya/
lenya/sandbox/search/src/webapp/lenya/pubs/
lenya/sandbox/search/src/webapp/lenya/pubs/default/
lenya/sandbox/search/src/webapp/lenya/pubs/default/config/
lenya/sandbox/search/src/webapp/lenya/pubs/default/config/index_manager.xconf
lenya/sandbox/search/src/webapp/lenya/pubs/default/config/index_manager_index.xconf
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/publication/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/publication/templating/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/publication/templating/Instantiator.java
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/usecases/
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/usecases/Deactivate.java
lenya/sandbox/search/src/webapp/lenya/pubs/default/java/src/org/apache/lenya/defaultpub/cms/usecases/Publish.java
lenya/sandbox/search/src/webapp/lenya/usecases/
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/delete.jx
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/externalOpensearch.jx
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/index.jx
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/opensearch.jx
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/search.jx
lenya/sandbox/search/src/webapp/lenya/usecases/lucene/usecase.xmap
lenya/sandbox/search/src/webapp/lenya/xslt/
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/homepage2index.xsl
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/links2index.xsl
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/opensearch.xml
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/opensearch2html.xsl
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/search2html.xsl
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/search2opensearch.xsl
lenya/sandbox/search/src/webapp/lenya/xslt/lucene/xhtml2index.xsl
Added: lenya/sandbox/search/README-lenya-search
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/README-lenya-search?rev=265566&view=auto
==============================================================================
--- lenya/sandbox/search/README-lenya-search (added)
+++ lenya/sandbox/search/README-lenya-search Wed Aug 31 13:01:34 2005
@@ -0,0 +1,134 @@
+8-31-2005
+Robert P.G. Goene
+
+Google Summer of Code lenya-search README
+
+This file gives a short description of the purpose, installation, use
+and issues of the result of the above mentioned project.
+
+P u r p o s e
+
+The project's aims are discussed in the following document:
+
+http://wiki.apache.org/general/RobertGoene/SummerOfCode2005Proposal
+
+The main point was the enhancement the integration of the Lucene index with
+the Lenya CMS. The current version of Lenya contains a crawler that takes
+care of the indexing part. This external way of indexing prevents the user
+the use of the metadata as stored in the cms. Crawling a site has another
+drawback: a user is not able to index a page immediately. It stays a
+batch process that updates the index periodically.
+
+O V E R V I E W
+
+* Lucene 1.1 block enhancement
+
+ The package contains the adjusted patch of the Lucene1.1 block improvement
+ that can be found at:
+
+ http://issues.apache.org/bugzilla/show_bug.cgi?id=32263
+
+ The adjustments only concern the configuration of the components in such
+ a way that it can be used in Lenya publications. The java source has been
+ untouched. The source can be found in src/java/org/apache/cocoon. The
+ config can be found in src/modules/lucene/config and src/webapp/WEB-INF/
+
+* Changed files
+ The following files are changed in comparison with the trunk version of
+ Lenya at the date mentioned above:
+
+ local.blocks.properties The cocoon Lucene block is needed.
+ search.xsl The search navigation component points
+ Publication.java Invokes the index usecase when publishing.
+ Deactivate.java Invokes the delete usecase when deactivating.
+ Instantiator.java Configures the index when a publication is
+ added from Lenya itself.
+ lenya.roles Added the roles needed for the Lucene 1.1 block
+
+I n s t a l l a t i o n
+
+First, copy the files in the root of the current trunk of lenya.
+Second, copy the local.block.properties to the cocoon root and build cocoon.
+Third, clean and rebuild lenya.
+
+U s e
+
+* Configure Nutch location
+
+ As Nutch is not included in Lenya, i have provides a sample rss file with
some
+ dummy search results. When one wants to use Nutch as an external enigne, the
+ usecases/lucene/usecase.xmap file should be edited, where the generator of
+ the externalopensearch match should be edited.
+
+* Configure index location and structure
+
+ When a publication is added from within Lenya, the index path is set for
you. If
+ the publication is copied without this usecase, the path to the index should
be
+ edited. It can be found in the <yourpub>/config/index_manager_index.xconf
file.
+ This file contains the configuration of the specific index for the current
+ publication. The id and directory should be adjusted.
+
+ When one wants to modify the index structure, fields can be adjusted an added
+ in the same file. The xhtml2index.xsl file can be used to map the xml
elements
+ of the source document to the specified fields.
+
+I s s u e s
+
+The integration of the search facilities can be improved even more, of course.
+Other proposals for the enhancement of lenya in general are included below.
+My aim is the resolvement of these issues in a short period of time.
+
+* Nutch & Opensearch
+
+ The nutch integration is different than the proposed plan. The reason for
+ this is a reconsideration of the different roles of the Nutch index and
+ the Lenya index. The first one should be used for external documents and
+ should be as independent as possible. For many Lenya users the Lenya index
+ of internal documents will be sufficient.
+
+ Nutch can deliver its search results in the form of the opensearch rss
+ format. This is basically an rss feed with some extra fields that are
+ needed for search results.
+
+ http://opensearch.a9.com/
+
+ Although this seems to become a standard, it is not sufficient. Nutch has
+ added a field for the query terms. Beside this, there is no support for
+ paged search-results, although most search-engines do work with fragmented
+ search-results for large resultsets.
+
+ At this time, opensearch is used for the communication with Nutch.I will
+ form an enhancement proposal for this rss extension, which can be used in
+ Lenya if it seems reasonable to everyone involved. Compatibility with
+ opensearch will be the goal.
+
+* External search engines
+
+ Besides the single external search engine that is supported at this point of
+ time, it should be fairly easy to add additional external search engines that
+ deliver their result in an extende form of rss. One could think of
specialized
+ Nutch indexes, other Lenya Publications or any other search engine that
supports
+ the rss extension mentioned above.
+
+* Nutch usecase
+
+ The proposed Nutch usecase is not implemented at this point of time, because
of
+ the fact that the current Lenya version cannot schedule the periodic
execution
+ of some usecase. This being an essential feature for the Nutch execution, i
have
+ decided to postpone this part of the proposal and evaluate the needs for
such a
+ feature in Lenya.
+
+* External roles files
+
+ The lenya-search project makes use of the enhancement of the
+ LenyaSearchTransformer. This package isn't a part of Cocoon yet, so it
+ had to be integrated with Lenya for this project. For the most part, i
+ could manage it to modify existing files as little as possible and add
+ files instead. This, of course, does simplifiy the maintenance of Lenya.
+
+ The one thing i could not give a seperate place was the roles file, as
+ used by Cocoon. I had to copy the entries of the enhancement to the
+ lenya.roles document, a potential manitenance problem.
+
+ I will provide a patch to avoid this in a short while, for the experimenting
+ with external code will be more easy to manage.
Added: lenya/sandbox/search/src/cocoon/local.blocks.properties
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/cocoon/local.blocks.properties?rev=265566&view=auto
==============================================================================
--- lenya/sandbox/search/src/cocoon/local.blocks.properties (added)
+++ lenya/sandbox/search/src/cocoon/local.blocks.properties Wed Aug 31 13:01:34
2005
@@ -0,0 +1,176 @@
+
+# Copyright 1999-2004 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#------------------------------------------------------------------------------------
+# Blocks properties version to make sure that local.blocks.properties is kept
in sync
+# NOTE: Do NOT modify this number unless you are a Dev and want to modify this
file
+# NOTE for Devs: Update this number if this file is being updated.
+# Also update the version number within
src/targets/init-build.xml
+
+lenya.blocks.properties.version=220218
+
+#------------------------------------------------------------------------------#
+# Cocoon Blocks
#
+#------------------------------------------------------------------------------#
+
+# Remove blocks from your cocoon distribution by setting the corresponding
+# include property to true or false. The blocks are included by default, i.e.
if
+# no property was set.
+
+# NOTE: Don't modify this file directly but make a copy named
+# 'local.blocks.properties' and modify that. The build system will first load
+# 'local.blocks.properties' and properties are immutable in Ant.
+
+# NOTE: "[dependency]" indicates blocks that are required by other blocks.
+# Disabling batik, for example, will result in a RuntimeException when using
+# fop. Dependencies only needed for the block's samples are marked explicitely.
+# This latter information was introduced only short time ago, so do not expect
+# it to be complete.
+
+# NOTE: (to Cocoon committers): blocks.properties is generated from gump.xml
+# using "build generate-blocks.properties". Any changes to blocks definitions
+# must be made in gump.xml, not here.
+
+
+# Stable blocks
----------------------------------------------------------------
+
+# Stable blocks are those that can be considered ready for production and
+# will contain components and API that will remain stable and where
+# developers are committed to back compatibility. In short, stuff that you
+# can depend on.
+
+#-----[dependency]: "authentication-fw" depends on "session-fw".
+#-----[dependency]: "authentication-fw" is needed by "portal", "portal-fw".
+include.block.authentication-fw=false
+#-----[dependency]: "batik" is needed by "fop", "scratchpad", "tour".
+#include.block.batik=false
+include.block.bsf=false
+#-----[dependency]: "chaperon" depends on "xsp" (for samples).
+#include.block.chaperon=false
+#-----[dependency]: "databases" depends on "xsp".
+#-----[dependency]: "databases" is needed by "hsqldb", "jms", "ojb",
"petstore", "repository", "xmldb".
+#include.block.databases=false
+#-----[dependency]: "fop" depends on "batik".
+#-----[dependency]: "fop" is needed by "tour".
+#include.block.fop=false
+#-----[dependency]: "hsqldb" depends on "databases".
+#-----[dependency]: "hsqldb" is needed by "jms", "ojb", "petstore".
+#include.block.hsqldb=false
+#-----[dependency]: "html" is needed by "portal".
+#include.block.html=false
+include.block.itext=false
+include.block.jfor=false
+include.block.jsp=false
+#-----[dependency]: "linkrewriter" depends on "xsp".
+#include.block.linkrewriter=false
+#-----[dependency]: "lucene" depends on "forms" (for samples), "xsp" (for
samples).
+include.block.lucene=true
+#include.block.naming=false
+include.block.paranoid=false
+include.block.php=false
+include.block.poi=false
+#-----[dependency]: "portal" depends on "authentication-fw", "html",
"session-fw".
+#-----[dependency]: "portal" is needed by "faces".
+include.block.portal=false
+#include.block.profiler=false
+#-----[dependency]: "python" depends on "xsp".
+include.block.python=false
+#-----[dependency]: "session-fw" depends on "xsp".
+#-----[dependency]: "session-fw" is needed by "authentication-fw", "portal",
"portal-fw".
+#include.block.session-fw=false
+#-----[dependency]: "velocity" is needed by "petstore".
+include.block.velocity=false
+include.block.web3=false
+#-----[dependency]: "xmldb" depends on "databases".
+# TODO: Including the xmldb block might cause a conflict with the patched
xmldb libraries lib/xmldb-common-2003-09-02.jar and
lib/xmldb-xupdate-2003-10-14.jar
+include.block.xmldb=false
+#-----[dependency]: "xsp" is needed by "chaperon", "databases", "eventcache",
"forms", "linkrewriter", "lucene", "python", "scratchpad", "session-fw",
"woody".
+#include.block.xsp=false
+
+# Unstable blocks
--------------------------------------------------------------
+
+# Unstable blocks are currently under development and do not guarantee that the
+# contracts they expose (API, xml schema, properties, behavior) will remain
+# constant in time. Developers are not committed to back-compatibility just
yet.
+# This doesn't necessarily mean the blocks implementation is unstable or
+# the code can't be trusted for production, but use with care and watch
+# its development as things might change over time before they are marked
+# stable.
+
+#-----[dependency]: "apples" depends on "forms" (for samples).
+include.block.apples=false
+#-----[dependency]: "asciiart" is needed by "mail".
+include.block.asciiart=false
+#-----[dependency]: "axis" is needed by "scratchpad".
+include.block.axis=false
+#-----[dependency]: "cron" is needed by "scratchpad".
+include.block.cron=true
+#include.block.deli=false
+#-----[dependency]: "eventcache" depends on "jms", "xsp" (for samples).
+#-----[dependency]: "eventcache" is needed by "repository", "scratchpad".
+include.block.eventcache=false
+#-----[dependency]: "faces" depends on "portal", "taglib".
+include.block.faces=false
+#-----[dependency]: "forms" depends on "xsp" (for samples).
+#-----[dependency]: "forms" is needed by "apples", "javaflow", "lucene",
"ojb", "petstore", "tour".
+include.block.forms=true
+#-----[dependency]: "javaflow" depends on "forms", "ojb".
+include.block.javaflow=false
+include.block.jcr=true
+#-----[dependency]: "jms" depends on "databases" (for samples), "hsqldb".
+#-----[dependency]: "jms" is needed by "eventcache", "slide".
+include.block.jms=false
+include.block.linotype=false
+#-----[dependency]: "mail" depends on "asciiart".
+include.block.mail=false
+include.block.midi=false
+#-----[dependency]: "ojb" depends on "databases" (for samples), "forms" (for
samples), "hsqldb" (for samples).
+#-----[dependency]: "ojb" is needed by "javaflow".
+include.block.ojb=false
+#-----[dependency]: "petstore" depends on "databases", "forms", "hsqldb",
"velocity".
+include.block.petstore=false
+#include.block.proxy=false
+include.block.qdox=false
+#-----[dependency]: "repository" depends on "databases", "eventcache".
+#-----[dependency]: "repository" is needed by "scratchpad", "slide", "webdav".
+include.block.repository=false
+#-----[dependency]: "scratchpad" depends on "axis", "batik" (for samples),
"cron", "eventcache", "repository", "xsp".
+include.block.scratchpad=false
+#include.block.serializers=false
+#-----[dependency]: "slide" depends on "jms", "repository".
+include.block.slide=false
+include.block.slop=false
+include.block.stx=false
+#-----[dependency]: "taglib" is needed by "faces".
+include.block.taglib=false
+#-----[dependency]: "tour" depends on "batik", "fop", "forms".
+include.block.tour=false
+#-----[dependency]: "webdav" depends on "repository".
+include.block.webdav=false
+
+# Deprecated blocks
------------------------------------------------------------
+
+# Although some of these blocks may have been stable, they are now deprecated
+# in favour of other blocks and therefore are excluded by default from the
build.
+# For including one of them you have to set the exclude property into comment
in
+# blocks.properties.
+
+include.block.php=false
+#-----[dependency]: "portal-fw" depends on "authentication-fw", "session-fw".
+include.block.portal-fw=false
+include.block.swf=false
+#-----[dependency]: "woody" depends on "xsp" (for samples).
+include.block.woody=false
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/Index.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/Index.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/Index.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/Index.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,311 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cocoon.components.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.components.search.fieldmodel.DateFieldDefinition;
+import org.apache.cocoon.components.search.fieldmodel.FieldDefinition;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+/**
+ * Index Class
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class Index {
+
+ /**
+ * default analyzer ID
+ */
+ private String defaultAnalyzer;
+
+ /**
+ * Index Structure definition
+ */
+ private IndexStructure structure;
+
+ /**
+ * Index ID
+ */
+ private String id;
+
+ /**
+ * Lucene Directory of the index
+ */
+ private Directory directory;
+
+ /**
+ * Number of try to access to the indexer
+ *
+ */
+ private int numtries = 5;
+
+ /**
+ * is the indexer working (not released)
+ */
+ private boolean indexer_busy;
+
+ /**
+ * Indexer Role name
+ */
+ private String indexer_role;
+
+ private ServiceManager manager;
+
+ /**
+ * Create a lucene document
+ *
+ * @param uid
+ * String the document uid
+ * @return Document a empty document
+ */
+ public Document createDocument(String uid) {
+ Document doc = new Document();
+ try {
+ doc.add(createField(Indexer.DOCUMENT_UID_FIELD, uid));
+ } catch (IndexException ex) {
+ }
+ return doc;
+ }
+
+ /**
+ * create a lucene field
+ *
+ * @param fieldname
+ * String fieldname (must existed in the index structure)
+ * @param value
+ * String value
+ */
+ public Field createField(String fieldname, String value)
+ throws IndexException {
+ FieldDefinition f = structure.getFieldDef(fieldname);
+ if (f == null) {
+ throw new IndexException("Field with the name: " + fieldname
+ + " doesn't exist");
+ }
+ return f.createLField(value);
+ }
+
+ /**
+ * create a lucene field for date value
+ *
+ * @param fieldname
+ * String fieldname (must existed in the index structure)
+ * @param value
+ * String value
+ */
+ public Field createField(String fieldname, Date value)
+ throws IndexException {
+ DateFieldDefinition f = (DateFieldDefinition) structure
+ .getFieldDef(fieldname);
+ if (f == null) {
+ throw new IndexException("Field with the name: " + fieldname
+ + " doesn't exist");
+ }
+ return f.createLField(value);
+ }
+
+ /**
+ * get the indexer of the index
+ *
+ * @throws IndexException
+ * @return Indexer
+ */
+ public synchronized Indexer getIndexer() throws IndexException {
+
+ int tmptries = numtries;
+
+ // wait the end of the indexing
+ while (indexer_busy && tmptries > 0) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException ex) {
+ }
+ tmptries--;
+ }
+
+ if (indexer_busy) {
+ throw new IndexException(
+ "Timeout to access to the indexer (the indexer is
indexing)");
+ }
+ AnalyzerManager analyzerM = null;
+ try {
+
+ indexer_busy = true;
+ Indexer indexer = (Indexer) this.manager.lookup(indexer_role);
+
+ // update maybe the analyzer
+ analyzerM = (AnalyzerManager) this.manager
+ .lookup(AnalyzerManager.ROLE);
+
+ indexer.setAnalyzer(analyzerM.getAnalyzer(getDefaultAnalyzerID()));
+ indexer.setIndex(directory);
+
+ return indexer;
+ } catch (ServiceException ex1) {
+ throw new IndexException(ex1);
+ } catch (ConfigurationException ex2) {
+ throw new IndexException(ex2);
+ } finally {
+ if (analyzerM != null) {
+ manager.release(analyzerM);
+ }
+ }
+ }
+
+ /**
+ * Release the indexer
+ *
+ * @param indexer
+ */
+ public synchronized void releaseIndexer(Indexer indexer) {
+ if (indexer != null) {
+ this.manager.release(indexer);
+ indexer_busy = false;
+ }
+ }
+
+ /**
+ * get the index ID
+ *
+ * @return the index ID
+ */
+ public String getID() {
+ return id;
+ }
+
+ /**
+ * Set the index ID
+ *
+ * @param id
+ * index ID
+ */
+ public void setID(String id) {
+ this.id = id;
+ }
+
+ /**
+ * get the default Analyzer
+ *
+ * @return the id of the default analyzer
+ */
+ public String getDefaultAnalyzerID() {
+ return defaultAnalyzer;
+ }
+
+ /**
+ * set the default Analyzer
+ *
+ * @param defaultAnalyzerID
+ * the id of the default Analyzer
+ */
+ public void setDefaultAnalyzerID(String defaultAnalyzerID) {
+ this.defaultAnalyzer = defaultAnalyzerID;
+ }
+
+ /**
+ * Return the index Structure
+ *
+ * @return the index Structure
+ */
+ public IndexStructure getStructure() {
+ return structure;
+ }
+
+ /**
+ * Set the index structure
+ *
+ * @param structure
+ * IndexStructure
+ */
+ public void setStructure(IndexStructure structure) {
+ this.structure = structure;
+ }
+
+ public void setManager(ServiceManager manager) {
+ this.manager = manager;
+ }
+
+ /**
+ * get the lucene directory
+ *
+ * @return the lucene directory
+ */
+ public Directory getDirectory() {
+ return directory;
+ }
+
+ /**
+ * Set the lucene Directory
+ *
+ * @param dir
+ * lucene Directory
+ * @return success or not
+ * @throws IOException
+ */
+ public boolean setDirectory(Directory dir) throws IOException {
+ boolean locked = false;
+ this.directory = dir;
+
+ // if index is locked
+ if (IndexReader.isLocked(directory)) {
+ IndexReader.unlock(directory);
+ locked = true;
+ }
+
+ // create index if the index doesn't exist
+ if (!IndexReader.indexExists(directory)) {
+ (new IndexWriter(directory, null, true)).close();
+ }
+
+ return locked;
+
+ }
+
+ /**
+ * Set the index path directory
+ *
+ * @param path
+ * String
+ * @throws IOException
+ */
+ public boolean setDirectory(String path) throws IOException {
+ File fpath = new File(path);
+ Directory dir = FSDirectory.getDirectory(fpath, !fpath.exists());
+ return setDirectory(dir);
+ }
+
+ /**
+ * @param indexer The indexer to set.
+ */
+ public void setIndexer(String indexer) {
+ this.indexer_role = indexer;
+ }
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexException.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexException.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexException.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexException.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIE
+ * 0S OR CONDITIONS OF ANY KIND, either express or implied.
+ * 0See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search;
+
+/**
+ * Index Exception class
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class IndexException extends Exception {
+
+ private String message;
+
+ public IndexException(String mes) {
+ this(mes, null);
+ }
+
+ public IndexException(Exception ex) {
+ this("", ex);
+ }
+
+ /**
+ * Constructor
+ *
+ * @param mes
+ * message
+ * @param ex
+ * initial exception
+ */
+ public IndexException(String mes, Exception ex) {
+
+ message = mes;
+ if (ex != null) {
+ initCause(ex);
+ }
+ }
+
+ public String getMessage() {
+ return "message: " + message;
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexStructure.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexStructure.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexStructure.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/IndexStructure.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.components.search.fieldmodel.FieldDefinition;
+
+/**
+ * Index Definition class, contain all the index field definitions.
+ *
+ * @author Nicolas Maisonneuve
+ *
+ */
+public final class IndexStructure {
+
+ private Map fielddefs;
+
+ public IndexStructure() {
+ fielddefs = new HashMap();
+
+ // A index has always an UID field
+ FieldDefinition fielddef = FieldDefinition.create(
+ Indexer.DOCUMENT_UID_FIELD, FieldDefinition.KEYWORD);
+ fielddef.setStore(true);
+ this.addFieldDef(fielddef);
+
+ }
+
+ /**
+ * add a fieldDefiniition to the indexDefinition
+ *
+ * @param fielddef
+ */
+ public void addFieldDef(FieldDefinition fielddef) {
+ if (fielddefs.containsKey(fielddef.name())) {
+ throw new IllegalArgumentException(" field with the name "
+ + fielddef.name() + " is already used");
+ }
+ fielddefs.put(fielddef.name(), fielddef);
+ }
+
+ /**
+ * @return all fieldnames contained in the index
+ */
+ public final String[] getFieldNames() {
+ Set results = fielddefs.keySet();
+ return (String[]) results.toArray(new String[results.size()]);
+ }
+
+ /**
+ * return all fieldDefinitions
+ *
+ * @return FieldDefinition[]
+ */
+ public final FieldDefinition[] getFieldDef() {
+ Collection results = fielddefs.values();
+ return (FieldDefinition[]) results.toArray(new FieldDefinition[results
+ .size()]);
+ }
+
+ /**
+ * Return the fieldDefinition associated to the name
+ *
+ * @param fieldname
+ * String the name of the fieldDefiniation
+ * @return FieldDefinition
+ */
+ public final FieldDefinition getFieldDef(String fieldname) {
+ return (FieldDefinition) fielddefs.get(fieldname);
+ }
+
+ /**
+ * check if this field exist
+ *
+ * @param name
+ * the field's name
+ * @return true if a field with this name exist
+ */
+ public final boolean hasField(String name) {
+ return fielddefs.containsKey(name.intern());
+ }
+
+ public String toString() {
+ String result = new String("DocumentFactory:");
+ Iterator iter = this.fielddefs.values().iterator();
+ while (iter.hasNext()) {
+ FieldDefinition item = (FieldDefinition) iter.next();
+ result += "\n" + item.toString();
+ }
+ return result;
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,154 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import java.io.Reader;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.LogEnabled;
+import org.apache.avalon.framework.logger.Logger;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.utils.SourceHelper;
+import org.apache.excalibur.source.Source;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Analyzer Wrapper to allow to configure a lucene analyzer with a XML file
+ *
+ * @author Nicolas Maisonneuve
+ */
+public abstract class ConfigurableAnalyzer extends Analyzer implements
+ LogEnabled {
+
+ /**
+ * the lucene analyzer
+ */
+ protected Analyzer analyzer;
+
+ /**
+ * a logger
+ */
+ protected Logger logger;
+
+ /**
+ * the analyzer manager component
+ */
+ protected AnalyzerManager analyzerM;
+
+ /**
+ * Check config file or not (to update the analyzer if the config file
+ * changes)
+ */
+ private boolean checkConfigFile = false;
+
+ /**
+ * Configuration file source
+ */
+ private Source configFile;
+
+ /**
+ * Configure this analyzer. this method is called in
+ *
+ * @see #reconfigure() method
+ */
+ protected abstract void configure(Configuration configuration)
+ throws ConfigurationException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
+ * java.io.Reader)
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return analyzer.tokenStream(fieldName, reader);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
+ */
+ public void enableLogging(Logger log) {
+ logger = log;
+ }
+
+ /**
+ * Enable the check of the config file (to update the analyzer if the
config
+ * file changes) when the method
+ *
+ * @see
org.apache.cocoon.component.search.components.AnalyzerManager#getAnalyzer(String)
+ * is called
+ * @param check
+ * true if we want that
+ */
+ public void setEnableCheckFile(boolean check) {
+ this.checkConfigFile = check;
+ }
+
+ /**
+ * is the checkFile property enable ?
+ */
+ public boolean enableCheckFile() {
+ return this.checkConfigFile;
+ }
+
+ /**
+ * reconfigure the analyzer if the config file has changed
+ *
+ * @throws ConfigurationException
+ * @return boolean true if the analyzer is reconfigured (=file has changed)
+ * else false
+ */
+ public boolean reconfigure() throws ConfigurationException {
+ if (!SourceHelper.checkSourceValidity(configFile)) {
+ logger.info("reconfiguration of " + this.getClass().getName()
+ + " (the source " + configFile.getURI()
+ + " has changed...) ");
+ Configuration conf = SourceHelper.build(configFile);
+ configure(conf);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Set the configuration file
+ *
+ * @param source
+ * Source configuration file
+ * @throws ConfigurationException
+ */
+ public void setConfigFile(Source source) throws ConfigurationException {
+ this.configFile = source;
+ SourceHelper.registerSource(configFile);
+ configure(SourceHelper.build(configFile));
+ }
+
+ /**
+ * set the analyzerManager
+ *
+ * @param analyzerM
+ * AnalyzerManager
+ */
+ public void setAnalyerManager(AnalyzerManager analyzerM) {
+ this.analyzerM = analyzerM;
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+
+/**
+ * Configurable PerFieldAnalyzerWrapper. Allow one analyzer per field for
+ * indexing a document (useful for multilanguage document)
+ *
+ * (@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper class)
+ *
+ *
+ * A config file for this analyzer is:
+ *
+ * <!-- if a lucene document containing a field not present in the "field"
tags,
+ * the defaultAnalyzer would be used --> <config defaultAnalyzer="analyzerEN">
+ * <fields><!-- if a lucene document contains the field "summury" , the
+ * analyzer "analyzerEN" would be used --> <field name="summury"
+ * analyzer="analyzerEN"/> <field name="desc_fr" analyzer="analyzerFR"/> <field
+ * name="desc_en" analyzer="analyzerEN"/> <field name="desc_de"
+ * analyzer="analyzerDE"/> </fields> </config>
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class ConfigurablePerFieldAnalyzer extends ConfigurableAnalyzer {
+
+ public static final String CONFIG_DEFAULTANALYZER_ATTRIBUTE =
"defaultAnalyzer";
+
+ public static final String FIELDS_ELEMENT = "fields";
+
+ public static final String FIELD_ELEMENT = "field";
+
+ public static final String FIELD_NAME_ATTRIBUTE = "name";
+
+ public static final String FIELD_ANALYZERID_ATTRIBUTE = "analyzer";
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.analyzer.ConfigurableAnalyzer#configure(org.apache.avalon.framework.configuration.Configuration)
+ */
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+
+ String analyzerid = configuration
+ .getAttribute(CONFIG_DEFAULTANALYZER_ATTRIBUTE);
+
+ Analyzer analyzer = analyzerM.getAnalyzer(analyzerid);
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + analyzerid
+ + " doesn't exist");
+ }
+
+ PerFieldAnalyzerWrapper tmpanalyzer = new PerFieldAnalyzerWrapper(
+ analyzer);
+ Configuration[] conffield = configuration.getChild(FIELDS_ELEMENT)
+ .getChildren(FIELD_ELEMENT);
+
+ for (int i = 0; i < conffield.length; i++) {
+
+ String fieldname = conffield[i].getAttribute(FIELD_NAME_ATTRIBUTE);
+ analyzerid = conffield[i].getAttribute(FIELD_ANALYZERID_ATTRIBUTE);
+
+ if (fieldname == null || fieldname.equals("")) {
+ throw new ConfigurationException("element " + FIELD_ELEMENT
+ + " must have the " + FIELD_NAME_ATTRIBUTE
+ + " attribute");
+ }
+ if (analyzerid == null || analyzerid.equals("")) {
+ throw new ConfigurationException("element " + FIELD_ELEMENT
+ + " must have the " + FIELD_ANALYZERID_ATTRIBUTE
+ + " attribute");
+ }
+
+ analyzer = analyzerM.getAnalyzer(analyzerid);
+
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + analyzerid
+ + " doesn't exist");
+ }
+ tmpanalyzer.addAnalyzer(fieldname, analyzer);
+ }
+ this.analyzer = tmpanalyzer;
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+/**
+ * Configurable Stopword Analyzer
+ *
+ * Config file:
+ *
+ * <stopWords><stopWord>a </stopWord> <stopWord>the </stopWord> <stopWord>but
+ * </stopWord> </stopWords>
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class ConfigurableStopwordAnalyzer extends ConfigurableAnalyzer {
+
+ /** The element containing a stop word. */
+ private static final String STOP_WORD_ELEMENT = "stopword";
+
+ /**
+ * Configures the analyzer.(stop words)
+ */
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+ String[] words = stopTableBuilder(configuration);
+ logger.info("stop words number: " + words.length);
+ analyzer = new StandardAnalyzer(words);
+ }
+
+ /**
+ * Build Stop Table
+ *
+ * @param conf
+ * Configuration file (above the STOP_WORDS ELEMENT)
+ * @throws ConfigurationException
+ * @return String[] array with all excluded words
+ */
+ static public String[] stopTableBuilder(Configuration conf)
+ throws ConfigurationException {
+
+ Configuration[] cStops = conf.getChildren(STOP_WORD_ELEMENT);
+ if (cStops != null) {
+ final String[] words = new String[cStops.length];
+ for (int i = 0; i < cStops.length; i++) {
+ words[i] = cStops[i].getValue();
+ }
+ return words;
+ }
+
+ final String[] words = new String[0];
+ return words;
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cocoon.components.search.components;
+
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+ * Analyzer Manager Component
+ *
+ * @author Maisonneuve Nicolas
+ */
+
+public interface AnalyzerManager {
+
+ public static final String ROLE = AnalyzerManager.class.getName();
+
+ /**
+ * Return the analyzer
+ *
+ * @param id
+ * analyzer ID
+ * @return
+ * @see org.apache.lucene.analysis.Analyzer
+ */
+ public Analyzer getAnalyzer(String id) throws ConfigurationException;
+
+ /**
+ * Is this analyzer exist
+ *
+ * @param id
+ * String the analyzer id
+ * @return boolean
+ */
+ public boolean exist(String id);
+
+ /**
+ * Return all analyzer IDs
+ *
+ * @return A array with all id's analyzer
+ */
+ public String[] getAnalyzersID();
+
+ /**
+ * Add a lucene analyser
+ *
+ * @param id
+ * the id of the analyzer
+ * @param analyzer
+ * the analyzer to add
+ */
+ public void put(String id, Analyzer analyzer);
+
+ /**
+ * Remove a analyzer
+ *
+ * @param id
+ * the analyzer ID
+ */
+ public void remove(String id);
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/IndexManager.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/IndexManager.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/IndexManager.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/IndexManager.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.components.search.Index;
+import org.apache.cocoon.components.search.IndexException;
+
+/**
+ * Index Manager Class allow to register and access to a specific index
+ *
+ * @author Maisonneuve Nicolas
+ */
+public interface IndexManager {
+
+ public static final String ROLE = IndexManager.class.getName();
+
+ /**
+ * Return all indexes
+ *
+ * @return Array of indexes
+ */
+ public Index[] getIndex() throws IndexException;
+
+ /**
+ * Return the index with the id
+ *
+ * @param id
+ * the index ID
+ * @return l'index, null if no found
+ */
+ public Index getIndex(String id) throws IndexException;
+
+ /**
+ * add a index in the indexmanager
+ *
+ * @param index
+ */
+ public void addIndex(Index index);
+
+ /**
+ * remove a index
+ *
+ * @param id
+ * ID de l'index
+ */
+ public void remove(String id);
+
+ /**
+ * Check if the index exist
+ *
+ * @param id
+ * ID de l'index
+ * @return true if the index exist
+ */
+ public boolean contains(String id);
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Indexer.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Indexer.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Indexer.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Indexer.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+
+public interface Indexer {
+
+ public static final String ROLE = Indexer.class.getName();
+
+ /**
+ * All lucene documents must have a unique identifier field
+ */
+ public static final String DOCUMENT_UID_FIELD = "uid";
+
+ /**
+ * Index document (update or add if
+ *
+ * @link #clearIndex() is called before)
+ * @param doc
+ * Document
+ * @throws IndeException
+ */
+ public void index(Document doc) throws IndexException;
+
+ /**
+ * Delete document
+ *
+ * @param uid
+ * the uid of the document
+ * @return int the number of deleted documents
+ * @throws IndexException
+ */
+ public int del(String uid) throws IndexException;
+
+ /**
+ * Optimize the index
+ */
+ public void optimize() throws IndexException;
+
+ /**
+ * Set a lucene analyzer
+ *
+ * @param analyzer
+ * the analazer
+ */
+ public void setAnalyzer(Analyzer analyzer);
+
+ /**
+ * Get the lucene analyzer
+ */
+ public Analyzer getAnalyzer();
+
+ /**
+ * Set a merge factor value + set minMergeDocs=2*mergeFactor (see lucene
+ * docs)
+ *
+ * @param value
+ * the new merge factor
+ */
+ public void setMergeFactor(int value);
+
+ /**
+ *
+ * @return the mergeFactor
+ */
+ public int getMergeFactor();
+
+ /**
+ * clear the index
+ */
+ public void clearIndex() throws IndexException;
+
+ /**
+ * Set the index directory
+ *
+ * @param directory
+ * the index directory
+ * @throws Exception
+ */
+ public void setIndex(Directory directory) throws IndexException;
+
+ public Directory getIndex() throws IndexException;
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Searcher.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Searcher.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Searcher.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/Searcher.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,59 @@
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.ProcessingException;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+
+/**
+ * this Searcher Component allow:
+ * <br/> - search in several indexes
+ * <br/> - sort hits with a speficied
+ *
+ * @author Nicolas Maisonneuve
+ */
+public interface Searcher {
+/**
+ * The ROLE name of this avalon component.
+ * <p>
+ * Its value if the FQN of this interface,
+ * ie. <code>org.apache.cocoon.components.search.Searcher</code>.
+ * </p>
+ *
+ * @since
+ */
+String ROLE = Searcher.class.getName();
+
+/**
+ * add a lucene directory
+ * you can add several directories
+ * <p>
+ * The directory specifies the directory used for looking up the
+ * index. It defines the physical place of the index
+ * </p>
+ *
+ * @param directory The new directory value
+ */
+public void addDirectory(Directory directory);
+
+
+/**
+ * Set sort the hits with a field
+ * @param field the index field
+ * @param reverse reverse order or not
+ */
+public void setSortField(String field, boolean reverse);
+
+
+/**
+ * Search using a Lucene Query object, returning zero, or more hits.
+ * <p>
+ * </p>
+ *
+ * @param query A lucene query
+ * @return Hits zero or more hits matching the query
string
+ * @exception ProcessingException throwing due to processing errors while
+ * looking up the index directory, parsing the query string, generating the
hits.
+ */
+public Hits search(Query query) throws ProcessingException;
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,393 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Abstract Indexer
+ *
+ * @author Nicolas Maisonneuve
+ */
+public abstract class AbstractIndexer extends AbstractLogEnabled implements
+ Indexer, Recyclable {
+
+ /**
+ * the lucene Analyzer (see lucene doc)
+ */
+ protected Analyzer analyzer;
+
+ /**
+ * lucene Directory (see lucene doc)
+ */
+ protected Directory dir;
+
+ /**
+ * MergeFactor (see lucene doc)
+ */
+ protected int mergeFactor;
+
+ /**
+ * clear mode (if true the index will be cleared)
+ */
+ protected boolean clear_mode;
+
+ // runtime variables: lucene indexwriter and indexreader
+ protected IndexReader delete_reader;
+
+ protected IndexWriter add_writer;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#setMergeFactor(int)
+ */
+ public void setMergeFactor(int value) {
+ mergeFactor = value;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#getMergeFactor()
+ */
+ public int getMergeFactor() {
+ return mergeFactor;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#getIndex()
+ */
+ public Directory getIndex() {
+ return this.dir;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#setIndex(org.apache.lucene.store.Directory)
+ */
+ public void setIndex(Directory dir) throws IndexException {
+ if (dir == null) {
+ throw new IllegalArgumentException("set a null directory");
+ }
+ this.dir = dir;
+ clear_mode = false;
+ try {
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+
+ } catch (IOException ioe) {
+ // couldn't open the index - so recreate it
+ if (getLogger().isWarnEnabled()) {
+ getLogger().warn("couldn't open the index - so recreate it");
+ }
+ this.clearIndex();
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#setAnalyzer(org.apache.lucene.analysis.Analyzer)
+ */
+ public void setAnalyzer(Analyzer analyzer) {
+ if (analyzer == null) {
+ throw new IllegalArgumentException("set a null analyzer");
+ }
+ this.analyzer = analyzer;
+
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug(
+ "set the analyzer " + this.analyzer.getClass().getName());
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#getAnalyzer()
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ protected abstract void updateDocument(Document doc) throws IndexException;
+
+ protected abstract void addDocument(Document doc) throws IndexException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#index(org.apache.lucene.document.Document)
+ */
+ public void index(Document doc) throws IndexException {
+ if (this.clear_mode) {
+ // As we know the index is empty , we just add the document
+ addDocument(doc);
+ } else {
+ updateDocument(doc);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Indexer#del(java.lang.String)
+ */
+ public int del(String uid) throws IndexException {
+ switchToDEL_MODE();
+ return deleteDocument(delete_reader, uid);
+ }
+
+ /**
+ * Delete document
+ *
+ * @param deleter
+ * the lucene indexreader to delete document
+ * @param uid
+ * the uid of the doucment to be deleted
+ * @return the number of deleted documents
+ * @throws IndexException
+ */
+ final protected int deleteDocument(IndexReader deleter, String uid)
+ throws IndexException {
+ int r = 0;
+ try {
+ r = deleter.delete(new Term(DOCUMENT_UID_FIELD, uid));
+ } catch (IOException ex) {
+ handleError("delete document (uid:" + uid + ") error", ex);
+ }
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug("document deleted (uid:" + uid + ")");
+ }
+ return r;
+ }
+
+ /**
+ * add document to the index
+ *
+ * @param writer
+ * the lucene indexwriter
+ * @param document
+ * the document to be indexed
+ * @throws IndexException
+ */
+ final protected void addDocument(IndexWriter writer, Document document)
+ throws IndexException {
+ try {
+ writer.addDocument(document, analyzer);
+ } catch (IOException ex) {
+ handleError("add document (uid:"
+ + document.get(DOCUMENT_UID_FIELD) + ") error", ex);
+ }
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug(
+ "document added (uid:" + document.get(DOCUMENT_UID_FIELD)
+ + ")");
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#optimize()
+ */
+ public void optimize() throws IndexException {
+ // optimize index
+ try {
+ this.switchToADD_MODE(false);
+ add_writer.optimize();
+ } catch (IOException ex) {
+ throw new IndexException("optimization error", ex);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#clearIndex()
+ */
+ public void clearIndex() throws IndexException {
+ this.clear_mode = true;
+ this.switchToADD_MODE(true);
+ }
+
+ /**
+ * releasing resources
+ *
+ * @throws IndexException
+ */
+ protected void release() throws IndexException {
+ this.closeWriter();
+ this.closeReader();
+ // set default value
+ dir = null;
+ analyzer = null;
+ mergeFactor = 10;
+ }
+
+ /**
+ * recylcle this object
+ */
+ public void recycle() {
+ try {
+ release();
+ } catch (IndexException ex) {
+ this.getLogger().error("recycle error", ex);
+ }
+ }
+
+ /**
+ * Switch to write mode (close read, open writer ) if it's not already done
+ *
+ * @param clear
+ * clear index
+ * @throws IndexException
+ */
+ final protected void switchToADD_MODE(boolean clear) throws IndexException
{
+ if (add_writer == null) {
+ closeReader();
+ openIndexWriter(clear);
+ }
+ }
+
+ /**
+ * Switch to del mode (close writer, open reader ) if it's not already done
+ *
+ * @throws IndexException
+ */
+ final protected void switchToDEL_MODE() throws IndexException {
+ if (delete_reader == null) {
+ closeWriter();
+ openIndexReader();
+ }
+ }
+
+ /**
+ * Open the index Writer
+ *
+ * @param create
+ * clear index or not
+ * @throws IndexException
+ */
+ final protected void openIndexWriter(boolean create) throws IndexException
{
+
+ // now open writer
+ try {
+ add_writer = new IndexWriter(dir, analyzer, create);
+ // add_writer.setUseCompoundFile(true);
+ } catch (IOException e) {
+ throw new IndexException("open writer error", e);
+ }
+
+ if (mergeFactor > add_writer.mergeFactor) {
+ add_writer.minMergeDocs = mergeFactor * 2;
+ add_writer.mergeFactor = mergeFactor;
+ }
+
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("writer is opened");
+ }
+ }
+
+ /**
+ * Open Index Reader
+ *
+ * @throws IndexException
+ */
+ final protected void openIndexReader() throws IndexException {
+ try {
+ this.delete_reader = IndexReader.open(dir);
+ } catch (IOException e) {
+ throw new IndexException("open reader error", e);
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("reader is opened");
+ }
+
+ }
+
+ /**
+ * Close writer
+ *
+ * @throws IndexException
+ */
+ final protected void closeWriter() throws IndexException {
+ if (add_writer != null) {
+ try {
+ add_writer.close();
+ } catch (IOException ex) {
+ throw new IndexException("close writer error", ex);
+ } finally {
+ add_writer = null;
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("writer is closed");
+ }
+ }
+ }
+
+ /**
+ * Close reader
+ *
+ * @throws IndexException
+ */
+ final protected void closeReader() throws IndexException {
+ if (this.delete_reader != null) {
+ try {
+ delete_reader.close();
+ } catch (IOException ex) {
+ handleError("close reader error", ex);
+ } finally {
+ delete_reader = null;
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("reader is closed");
+ }
+ }
+ }
+
+ /**
+ * Handle error (close writer, reader,etc.. )
+ *
+ * @param message
+ * @param exception
+ * @throws IndexException
+ */
+ private void handleError(String message, Exception exception)
+ throws IndexException {
+ try {
+ release();
+ } catch (IndexException e) {
+ }
+ throw new IndexException(message, exception);
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.activity.Disposable;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.components.search.components.Searcher;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.Directory;
+
+/**
+ * @author Nicolas Maisonneuve
+ *
+ */
+abstract class AbstractSearcher extends AbstractLogEnabled implements Searcher,
+ Disposable, Recyclable {
+ /**
+ * Lucene Directory
+ */
+ protected List directories = new ArrayList();
+
+ /**
+ * Lucene SortField
+ */
+ protected SortField sortfield;
+
+ /**
+ * Lucene Searcher
+ */
+ protected org.apache.lucene.search.Searcher luceneSearcher;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Searcher#addDirectory(org.apache.lucene.store.Directory)
+ */
+ public void addDirectory(Directory directory) {
+ directories.add(directory);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Searcher#search(org.apache.lucene.search.Query)
+ */
+ public Hits search(Query query) throws ProcessingException {
+ try {
+ getLuceneSearcher();
+
+ if (sortfield==null) {
+ return luceneSearcher.search(query);
+ }
+ else {
+ return luceneSearcher.search(query, new Sort(sortfield));
+ }
+ } catch (IOException e) {
+ throw new ProcessingException(e);
+ }
+
+ }
+
+ protected abstract void getLuceneSearcher()
+ throws IOException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.Searcher#setSortField(java.lang.String,
+ * boolean)
+ */
+ public void setSortField(String field, boolean reverse) {
+ sortfield = new SortField(field, reverse);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.activity.Disposable#dispose()
+ */
+ public void dispose() {
+ recycle();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.excalibur.pool.Recyclable#recycle()
+ */
+ public void recycle() {
+ try {
+ directories.clear();
+ sortfield = null;
+ luceneSearcher.close();
+ } catch (IOException ex) {
+ this.getLogger().error("release error", ex);
+ }
+
+ }
+
+}
Added:
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
URL:
http://svn.apache.org/viewcvs/lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java?rev=265566&view=auto
==============================================================================
---
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
(added)
+++
lenya/sandbox/search/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
Wed Aug 31 13:01:34 2005
@@ -0,0 +1,229 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.avalon.framework.logger.LogEnabled;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.avalon.framework.thread.ThreadSafe;
+import org.apache.cocoon.components.search.analyzer.ConfigurableAnalyzer;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.excalibur.source.Source;
+import org.apache.excalibur.source.SourceResolver;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+ * Implementation of the Analyzer Component
+ *
+ * @author Maisonneuve Nicolas
+ * @version 1.0
+ */
+public class AnalyzerManagerImpl extends AbstractLogEnabled implements
+ AnalyzerManager, Serviceable, Configurable, ThreadSafe {
+
+ /**
+ * The analyzer element
+ */
+ public static final String ANALYZER_ELEMENT = "analyzer";
+
+ /**
+ * the id of the analyzer
+ */
+ public static final String ID_ATT = "id";
+
+ /**
+ * the analyzer class name
+ */
+ public static final String CLASSNAME_ATT = "class";
+
+ /**
+ * (optional) a file to configure the analyzer
+ */
+ public static final String CONFIG_ATT = "configfile";
+
+ /**
+ * Automatic update or not the analyzer when the config file changes
+ */
+ public static final String CONFIGCHECK_ATT = "checkupdate";
+
+ /**
+ * Map of all the analyzer (ID, analyzer class)
+ */
+ private Map analyzers = new HashMap();
+
+ private ServiceManager manager;
+
+ public boolean exist(String id) {
+ return this.analyzers.containsKey(id);
+ }
+
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+ Analyzer analyzer;
+ String key;
+ Source conffile = null;
+ boolean checkconfigfile = false;
+ SourceResolver resolver;
+
+ Configuration[] confAnalyzer = configuration
+ .getChildren(ANALYZER_ELEMENT);
+ if (confAnalyzer.length == 0) {
+ throw new ConfigurationException("tag " + ANALYZER_ELEMENT
+ + " expected ");
+ }
+ try {
+ resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
+ } catch (ServiceException e) {
+ throw new ConfigurationException(" source resolver error", e);
+ }
+
+ for (int i = 0; i < confAnalyzer.length; i++) {
+
+ // KEY
+ key = confAnalyzer[i].getAttribute(ID_ATT);
+ if (key == null) {
+ throw new ConfigurationException("element " + ANALYZER_ELEMENT
+ + " must have a " + ID_ATT + " attribute");
+ }
+
+ // CLASS
+ String classname = confAnalyzer[i].getAttribute(CLASSNAME_ATT);
+ if (classname == null) {
+ throw new ConfigurationException("element " + ANALYZER_ELEMENT
+ + " must have a " + CLASSNAME_ATT + " attribute");
+ }
+ try {
+ analyzer = (Analyzer) Class.forName(classname).newInstance();
+ } catch (ClassNotFoundException ex) {
+ throw new ConfigurationException("analyzer class not found "
+ + classname, ex);
+ } catch (Exception ex) {
+ throw new ConfigurationException("instanciation of " + key
+ + " error", ex);
+ }
+
+ if (analyzer instanceof LogEnabled) {
+ this.setupLogger(analyzer);
+ }
+
+ if (analyzer instanceof ConfigurableAnalyzer) {
+ ConfigurableAnalyzer confanalyzer = ((ConfigurableAnalyzer)
analyzer);
+
+ // CONFIGFILE
+ String conffilename = confAnalyzer[i].getAttribute(CONFIG_ATT);
+
+ if (conffilename == null || conffilename.equals("")) {
+ throw new ConfigurationException("the analyzer " + key
+ + " must have a " + CONFIG_ATT + " attribute");
+ }
+
+ try {
+ conffile = resolver.resolveURI(conffilename);
+ } catch (Exception ex1) {
+ throw new ConfigurationException(
+ "Config file source error", ex1);
+ }
+
+ // CHECKUPDATE
+ checkconfigfile = confAnalyzer[i].getAttributeAsBoolean(
+ CONFIGCHECK_ATT, false);
+
+ confanalyzer.setAnalyerManager(this);
+ confanalyzer.setConfigFile(conffile);
+ confanalyzer.setEnableCheckFile(checkconfigfile);
+ }
+ this.put(key, analyzer);
+ }
+
+ manager.release(resolver);
+ getLogger().info("AnalyzerManager configured.");
+ System.out.println("Search Engine - AnalyzerManager configured.");
+
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.AnalyzerManager#put(java.lang.String,
+ * org.apache.lucene.analysis.Analyzer)
+ */
+ public void put(String id, Analyzer analyzer) {
+ this.analyzers.put(id, analyzer);
+ this.getLogger().info(
+ "add analyzer id: " + id + " with class "
+ + analyzer.getClass().getName());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.AnalyzerManager#remove(java.lang.String)
+ */
+ public void remove(String id) {
+ this.analyzers.remove(id);
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug("remove analyzer id: " + id);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.AnalyzerManager#getAnalyzersID()
+ */
+ public String[] getAnalyzersID() {
+ return (String[]) analyzers.keySet().toArray(
+ new String[analyzers.size()]);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.cocoon.components.search.components.AnalyzerManager#getAnalyzer(java.lang.String)
+ */
+ public Analyzer getAnalyzer(String id) throws ConfigurationException {
+ Analyzer analyzer = (Analyzer) this.analyzers.get(id);
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + id
+ + " doesn't exist");
+ }
+ if (analyzer instanceof ConfigurableAnalyzer) {
+ ConfigurableAnalyzer confAnalyzer = ((ConfigurableAnalyzer)
analyzer);
+ if (confAnalyzer.enableCheckFile()) {
+ confAnalyzer.reconfigure();
+ }
+ }
+ return analyzer;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
+ */
+ public void service(ServiceManager manager) throws ServiceException {
+ this.manager = manager;
+ }
+
+}
|