Apache Jackrabbit JCA 2.7.5 .docx 和 .xlsx 索引
Apache Jackrabbit JCA 2.7.5 .docx and .xlsx indexing
我正在使用 Appache Jackrabbit JCA 2.7.5,问题是文件 .docx 和 .xlsx 没有索引。
我的步骤:
- 在 glassfish
上将 Jackrabbit JCA 部署为 resource adapter
- 为
resource adapter
创建一个 Connector Connection Pool
表示 ConfigFile=path/to/the/repository.xml
和 HomeDir=path/to/the //miss the repository.xml
- 为连接器池(jndi)创建一个
Connector Resources
- 创建网络应用程序
创建 class 以从连接器资源(下面的代码)获取会话
import java.io.Serializable;
import java.net.MalformedURLException;
import javax.annotation.Resource;
import javax.ejb.Stateless;
import javax.jcr.LoginException;
import javax.jcr.Repository;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.SimpleCredentials;
import javax.naming.InitialContext;
import javax.naming.NamingException;
@Stateless
public class OcmRepository implements Serializable {
public Repository repository;
public Session session;
public OcmRepository() {
}
public Session getSession(String log, String mdp) throws LoginException, RepositoryException, NamingException, MalformedURLException {
InitialContext initalContext = new InitialContext();
repository = (Repository) initalContext.lookup("jndi/jca");
session = repository.login(new SimpleCredentials(log, mdp.toCharArray()), null);
return session;
}
}
创建自定义文件类型
import javax.jcr.PropertyType;
import javax.jcr.Session;
import javax.jcr.nodetype.NodeType;
import javax.jcr.nodetype.NodeTypeManager;
import javax.jcr.nodetype.NodeTypeTemplate;
import javax.jcr.nodetype.PropertyDefinitionTemplate;
/**
*
* @author nathan
*/
public class FileType {
public static void RegisterFileType(Session session) throws Exception {
NodeTypeManager nodeTypeManager = session.getWorkspace().getNodeTypeManager();
NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate();
nodeType.setName("FileType");
String[] str = {"nt:resource"};
nodeType.setDeclaredSuperTypeNames(str);
nodeType.setMixin(false);
nodeType.setQueryable(true);
PropertyDefinitionTemplate path = nodeTypeManager.createPropertyDefinitionTemplate();
path.setName("jcr:path");
path.setRequiredType(PropertyType.PATH);
path.setQueryOrderable(false);
path.setFullTextSearchable(false);
nodeType.getPropertyDefinitionTemplates().add(path);
PropertyDefinitionTemplate nom = nodeTypeManager.createPropertyDefinitionTemplate();
nom.setName("jcr:nom");
nom.setRequiredType(PropertyType.STRING);
nom.setQueryOrderable(true);
nom.setFullTextSearchable(true);
nodeType.getPropertyDefinitionTemplates().add(nom);
PropertyDefinitionTemplate description = nodeTypeManager.createPropertyDefinitionTemplate();
description.setName("jcr:description");
description.setRequiredType(PropertyType.STRING);
description.setQueryOrderable(true);
description.setFullTextSearchable(true);
nodeType.getPropertyDefinitionTemplates().add(description);
PropertyDefinitionTemplate motsCles = nodeTypeManager.createPropertyDefinitionTemplate();
motsCles.setName("jcr:motsCles");
motsCles.setRequiredType(PropertyType.STRING);
motsCles.setQueryOrderable(true);
motsCles.setFullTextSearchable(true);
nodeType.getPropertyDefinitionTemplates().add(motsCles);
PropertyDefinitionTemplate size = nodeTypeManager.createPropertyDefinitionTemplate();
size.setName("jcr:size");
size.setRequiredType(PropertyType.STRING);
size.setQueryOrderable(true);
size.setFullTextSearchable(false);
nodeType.getPropertyDefinitionTemplates().add(size);
PropertyDefinitionTemplate users = nodeTypeManager.createPropertyDefinitionTemplate();
users.setName("jcr:users");
users.setRequiredType(PropertyType.STRING);
users.setQueryOrderable(true);
users.setFullTextSearchable(false);
nodeType.getPropertyDefinitionTemplates().add(users);
PropertyDefinitionTemplate groupe = nodeTypeManager.createPropertyDefinitionTemplate();
groupe.setName("jcr:groupe");
groupe.setRequiredType(PropertyType.STRING);
groupe.setQueryOrderable(true);
groupe.setFullTextSearchable(false);
nodeType.getPropertyDefinitionTemplates().add(groupe);
NodeType newnodetype = nodeTypeManager.registerNodeType(nodeType, true);
session.save();
}
}
为持久性创建摘要class
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.jcr.Session;
import org.apache.jackrabbit.ocm.query.Filter;
import org.apache.jackrabbit.ocm.query.impl.FilterImpl;
import org.apache.jackrabbit.ocm.query.impl.QueryImpl;
import org.apache.jackrabbit.ocm.query.Query;
import org.apache.jackrabbit.ocm.query.QueryManager;
import org.apache.jackrabbit.ocm.manager.ObjectContentManager;
import org.apache.jackrabbit.ocm.manager.impl.ObjectContentManagerImpl;
import org.apache.jackrabbit.ocm.mapper.Mapper;
import org.apache.jackrabbit.ocm.mapper.impl.annotation.AnnotationMapperImpl;
import org.apache.jackrabbit.ocm.reflection.ReflectionUtils;
/**
*
* @author nathan
*/
public abstract class AbstractBean<T> {
private Class<T> entityClass;
private ObjectContentManager ocm;
private Mapper mapper;
public AbstractBean(Class<T> entityClass){
this.entityClass = entityClass;
}
/**
* Construct the Bean according to the extended class
* This will be also construct the ObjectContentManager nammed ocm with the default Mapper
* @param session javax.jcr.Session attached to the Bean
* @return The mapping class found for the desired java bean class
*/
public AbstractBean(Class<T> entityClass,Session session){
this.entityClass = entityClass;
ocm = new ObjectContentManagerImpl(session, this.getDefaultMapper());
}
/**
* @return ObjectContentManager of the Bean
*/
public ObjectContentManager getOcm() throws Exception{
return ocm;
}
/**
* Construct the Bean according to the extended class
* This will be also construct the ObjectContentManager nammed ocm with the param Mapper given
* @param session from "javax.jcr.Session" attached to the Bean
* @param map from "org.apache.jackrabbit.ocm.mapper.Mapper" which
* is the use to map entity between apllication and The repository
* @return ObjectContentManager of the Bean
*/
public ObjectContentManager getOcm(Session session, Mapper map) throws Exception{
return new ObjectContentManagerImpl(session, map);
}
public void setOcm(ObjectContentManager ocm) {
this.ocm = ocm;
}
private Mapper getDefaultMapper(){
ReflectionUtils.setClassLoader(com.ged.ocm.entity.Groupe.class.getClassLoader());
List<Class> classes = new ArrayList<Class>();
classes.add(com.ged.ocm.entity.Fichier.class);
classes.add(com.ged.ocm.entity.Dossier.class);
classes.add(com.ged.ocm.entity.Groupe.class);
classes.add(com.ged.ocm.entity.SimpleNode.class);
return new AnnotationMapperImpl(classes);
}
public Mapper getMapper() {
return mapper;
}
public void setMapper(Mapper mapper) {
this.mapper = mapper;
}
public void setLoader(Class classe){
ReflectionUtils.setClassLoader(classe.getClassLoader());
}
public void create(T entity) {
ocm.insert(entity);
ocm.save();
}
public void edit(T entity) {
ocm.update(entity);
ocm.save();
}
public void remove(T entity) {
ocm.remove(entity);
ocm.save();
}
public void refresh(){
ocm.refresh(true);
ocm.save();
}
public void copy(String orgPath, String destPath){
ocm.copy(orgPath, destPath);
ocm.save();
}
public void move(String orgPath, String destPath){
ocm.move(orgPath, destPath);
ocm.save();
}
public void removeByPath(String path) {
ocm.remove(path);
ocm.save();
}
public void removeAllByEqual(Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
ocm.remove(query);
ocm.save();
}
public void removeAllByEqual(String nodePath,Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
ocm.remove(query);
ocm.save();
}
public boolean isPathExist(String path){
return ocm.objectExists(path);
}
public T findByPath(String path) {
try {
return (T)ocm.getObject(path);
} catch (Exception e) {
return null;
}
}
public T findOneByEqual(Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
T result = null;
try {
result = results.get(0);
} catch (Exception e) {
}
return result;
}
public List<T> findAllByEqual(Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope("//");
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public List<T> findAllByLike(Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope("//");
for (String key : filters.keySet())filter.addLike(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public List<T> findAllByLikeScoped(String scope,Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(scope);
for (String key : filters.keySet())filter.addLike(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public List<T> findAllByOrLike(String attr,String[] val){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope("//");
filter.addOrFilter(attr, val);
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public T findOneByEqual(String nodePath, Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
T result = results.get(0);
return result;
}
public List<T> findAllByEqual(String nodePath, Map<String,String> filters){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public List<T> findAllByString(String query){
List<T> results = (List<T>) ocm.getObjects(query,javax.jcr.query.Query.JCR_SQL2);
return results;
}
public List<T> findAllByParentPath(String nodePath){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public List<T> findAllByParentPathOrder(String nodePath, String ordering){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
Query query = queryManager.createQuery(filter);
// query.addOrderByDescending(ordering);
query.addOrderByAscending(ordering);
List<T> results = (List<T>) ocm.getObjects(query);
return results;
}
public int coutChild(String nodePath){
QueryManager queryManager = ocm.getQueryManager();
Filter filter;
filter = queryManager.createFilter(entityClass);
filter.setScope(nodePath);
Query query = queryManager.createQuery(filter);
List<T> results = (List<T>) ocm.getObjects(query);
return results.size();
}
public boolean ifExistByPath(String path){
return ocm.objectExists(path);
}
public String getParentPath(String path){
String parent="";
String[] tmp=path.split("/");
for (int i = 1; i < (tmp.length-1); i++) {
parent+="/"+tmp[i];
}
return parent;
}
}
创建 bean
import javax.ejb.Stateless;
import com.ged.ocm.entity.Fichier;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.Session;
import javax.jcr.Workspace;
import javax.jcr.query.QueryResult;
import javax.jcr.query.qom.FullTextSearch;
import javax.jcr.query.qom.StaticOperand;
import org.apache.jackrabbit.ocm.query.Filter;
import org.apache.jackrabbit.ocm.query.Query;
import org.apache.jackrabbit.ocm.query.QueryManager;
@Stateless
public class FichierBean extends AbstractBean<Fichier>{
public FichierBean() {
super(Fichier.class);
}
public FichierBean(Session session) {
super(Fichier.class,session);
}
public List<Fichier> findAllByContains(String motCles) throws Exception {
String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*')";
List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2);
return results;
}
public List<Fichier> findAllByContains(String path,String motCles) throws Exception {
String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*') ORDER BY Res.nom";
List<Fichier> tmp = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2);
List<Fichier> results = new ArrayList<Fichier>();
for (Fichier fichier : tmp) {
if(fichier.getPath().startsWith(path))results.add(fichier);
}
return results;
}
public List<Fichier> fulltextByOCM(String motCles) throws Exception {
QueryManager queryManager = this.getOcm().getQueryManager();
Filter filter;
filter = queryManager.createFilter(com.ged.ocm.entity.Fichier.class);
filter.addContains(".", "*"+motCles+"*");
Query query = queryManager.createQuery(filter);
List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(query);
return results;
}
}
我的配置文件:
repository.xml
<?xml version="1.0"?>
<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN"
"http://jackrabbit.apache.org/dtd/repository-1.6.dtd">
<Repository>
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
-->
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
<param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
<param name="user" value="root" />
<param name="password" value="root" />
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="J_R_FS_"/>
</FileSystem>
<!--
security configuration
-->
<Security appName="Jackrabbit">
<AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager" />
<LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule">
<param name="anonymousId" value="anonymous" />
</LoginModule>
</Security>
<!--
location of workspaces root directory and name of default workspace
-->
<Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
<!--
workspace configuration template:
used to create the initial workspace if there's no workspace yet
-->
<Workspace name="${wsp.name}">
<PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
<param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
<param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
<param name="user" value="root" />
<param name="password" value="root" />
<param name="schema" value="mysql" />
<param name="schemaObjectPrefix" value="J_PM_${wsp.name}_" />
<param name="externalBLOBs" value="false" />
</PersistenceManager>
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
<param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
<param name="user" value="root" />
<param name="password" value="root" />
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="J_FS_${wsp.name}_"/>
</FileSystem>
<!--
Search index and the file system it uses.
class: FQN of class implementing the QueryHandler interface
-->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/workspaces/${wsp.name}/index"/>
<param name="tikaConfigPath" value="${rep.home}/tika-config.xml"/>
<param name="useCompoundFile" value="true"/>
<param name="minMergeDocs" value="100"/>
<param name="volatileIdleTime" value="3"/>
<param name="maxMergeDocs" value="2147483647"/>
<param name="mergeFactor" value="10"/>
<param name="maxFieldLength" value="10000"/>
<param name="bufferSize" value="10"/>
<param name="cacheSize" value="1000"/>
<param name="forceConsistencyCheck" value="false"/>
<param name="enableConsistencyCheck" value="false"/>
<param name="autoRepair" value="true"/>
<param name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/>
<param name="respectDocumentOrder" value="true"/>
<param name="resultFetchSize" value="2147483647"/>
<param name="extractorPoolSize" value="0"/>
<param name="extractorTimeout" value="100"/>
<param name="extractorBackLogSize" value="100"/>
<param name="supportHighlighting" value="true"/>
<param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.DefaultXMLExcerpt"/>
</SearchIndex>
</Workspace>
<!--
Configures the versioning
-->
<Versioning rootPath="${rep.home}/version">
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
<param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
<param name="user" value="root" />
<param name="password" value="root" />
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="J_V_FS_"/>
</FileSystem>
<PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
<param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
<param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
<param name="user" value="root" />
<param name="password" value="root" />
<param name="schema" value="mysql" />
<param name="schemaObjectPrefix" value="J_V_PM_" />
<param name="externalBLOBs" value="false" />
</PersistenceManager>
</Versioning>
<!--
Search index for content that is shared repository wide
(/jcr:system tree, contains mainly versions)
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/repository/index"/>
<param name="extractorPoolSize" value="2"/>
<param name="supportHighlighting" value="true"/>
</SearchIndex>
-->
<!--
Cluster configuration with system variables.
-->
<RepositoryLockMechanism class="org.apache.jackrabbit.core.util.CooperativeFileLock" />
</Repository>
蒂卡-config.xml
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<mimeTypeRepository resource="/org/apache/tika/mime/tika-mimetypes.xml" magic="false"/>
<parsers>
<parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser">
<mime>application/xml</mime>
<mime>image/svg+xml</mime>
</parser>
<parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser">
<mime>application/x-tika-msoffice</mime>
<mime>application/msword</mime>
<mime>application/vnd.ms-excel</mime>
<mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
<mime>application/vnd.ms-powerpoint</mime>
<mime>application/vnd.visio</mime>
<mime>application/vnd.ms-outlook</mime>
</parser>
<parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
<mime>application/x-tika-ooxml</mime>
<mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
<mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
<mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
<mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
<mime>application/vnd.ms-excel.template.macroenabled.12</mime>
<mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
<mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
<mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
<mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
<mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
<mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
<mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
<mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
<mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
<mime>application/vnd.ms-word.document.macroenabled.12</mime>
<mime>application/vnd.ms-word.template.macroenabled.12</mime>
</parser>
<parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">
<mime>text/html</mime>
<mime>application/xhtml+xml</mime>
<mime>application/x-asp</mime>
</parser>
<parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser">
<mime>application/rtf</mime>
</parser>
<parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser">
<mime>application/pdf</mime>
</parser>
<parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser">
<mime>text/plain</mime>
</parser>
<parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser">
<mime>application/vnd.sun.xml.writer</mime>
<mime>application/vnd.oasis.opendocument.text</mime>
<mime>application/vnd.oasis.opendocument.graphics</mime>
<mime>application/vnd.oasis.opendocument.presentation</mime>
<mime>application/vnd.oasis.opendocument.spreadsheet</mime>
<mime>application/vnd.oasis.opendocument.chart</mime>
<mime>application/vnd.oasis.opendocument.image</mime>
<mime>application/vnd.oasis.opendocument.formula</mime>
<mime>application/vnd.oasis.opendocument.text-master</mime>
<mime>application/vnd.oasis.opendocument.text-web</mime>
<mime>application/vnd.oasis.opendocument.text-template</mime>
<mime>application/vnd.oasis.opendocument.graphics-template</mime>
<mime>application/vnd.oasis.opendocument.presentation-template</mime>
<mime>application/vnd.oasis.opendocument.spreadsheet-template</mime>
<mime>application/vnd.oasis.opendocument.chart-template</mime>
<mime>application/vnd.oasis.opendocument.image-template</mime>
<mime>application/vnd.oasis.opendocument.formula-template</mime>
<mime>application/x-vnd.oasis.opendocument.text</mime>
<mime>application/x-vnd.oasis.opendocument.graphics</mime>
<mime>application/x-vnd.oasis.opendocument.presentation</mime>
<mime>application/x-vnd.oasis.opendocument.spreadsheet</mime>
<mime>application/x-vnd.oasis.opendocument.chart</mime>
<mime>application/x-vnd.oasis.opendocument.image</mime>
<mime>application/x-vnd.oasis.opendocument.formula</mime>
<mime>application/x-vnd.oasis.opendocument.text-master</mime>
<mime>application/x-vnd.oasis.opendocument.text-web</mime>
<mime>application/x-vnd.oasis.opendocument.text-template</mime>
<mime>application/x-vnd.oasis.opendocument.graphics-template</mime>
<mime>application/x-vnd.oasis.opendocument.presentation-template</mime>
<mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime>
<mime>application/x-vnd.oasis.opendocument.chart-template</mime>
<mime>application/x-vnd.oasis.opendocument.image-template</mime>
<mime>application/x-vnd.oasis.opendocument.formula-template</mime>
</parser>
<parser name="parse-image" class="org.apache.tika.parser.image.ImageParser">
<mime>image/bmp</mime>
<mime>image/gif</mime>
<mime>image/jpeg</mime>
<mime>image/png</mime>
<mime>image/tiff</mime>
<mime>image/vnd.wap.wbmp</mime>
<mime>image/x-icon</mime>
<mime>image/x-psd</mime>
<mime>image/x-xcf</mime>
</parser>
<parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
<mime>application/x-tika-java-class</mime>
</parser>
<parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser">
<mime>audio/mpeg</mime>
</parser>
<parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser">
<mime>application/x-midi</mime>
<mime>audio/midi</mime>
</parser>
<parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser">
<mime>audio/basic</mime>
<mime>audio/x-wav</mime>
<mime>audio/x-aiff</mime>
</parser>
</parsers>
</properties>
除了我调用函数 public List<Fichier> findAllByContains(String path,String motCles)
对 .docx 和 .xslx 文档进行全文搜索时,bean 的所有查询都有效。对 .pdf、.txt、.xml、.xls、.doc 等的全文搜索完美无缺。
On the same line, I have observed commons-compress-1.5.jar is required
by Tika parser in case of OOXML types of documents (i.e. office 2007
documents).
Now, I am able to index & search most of types of documents (office
2007 - docx, pptx, xlsx , office 2003 - doc, ppt, xls, PDF) using
below 2 steps:
(1) Updated repository.xml & added
Further details can be found at https://issues.apache.org/jira/browse/JCR-3287
(2) Added
commons-compress-1.5.jar classpath while running
jackrabbit-standalone-2.6.2.jar
解决方案主要针对 jackrabbit-jca-2.7.5.rar!
的 JAR
依赖性存在错误,因此我进行了这些更改:
- 添加 apache-mime4j-0。6.jar
- 添加 apache-mime4j-core-0.7.jar
- 添加 commons-compress-1。5.jar
在 jackrabbit-jca-2.7 中添加这些 JAR。5.rar 在部署之前!
并且 .docx、.xlsx、... 的索引编制成功!
感谢@Ashok Felix
我正在使用 Appache Jackrabbit JCA 2.7.5,问题是文件 .docx 和 .xlsx 没有索引。
我的步骤:
- 在 glassfish 上将 Jackrabbit JCA 部署为
- 为
resource adapter
创建一个Connector Connection Pool
表示ConfigFile=path/to/the/repository.xml
和HomeDir=path/to/the //miss the repository.xml
- 为连接器池(jndi)创建一个
Connector Resources
- 创建网络应用程序
创建 class 以从连接器资源(下面的代码)获取会话
import java.io.Serializable; import java.net.MalformedURLException; import javax.annotation.Resource; import javax.ejb.Stateless; import javax.jcr.LoginException; import javax.jcr.Repository; import javax.jcr.RepositoryException; import javax.jcr.Session; import javax.jcr.SimpleCredentials; import javax.naming.InitialContext; import javax.naming.NamingException; @Stateless public class OcmRepository implements Serializable { public Repository repository; public Session session; public OcmRepository() { } public Session getSession(String log, String mdp) throws LoginException, RepositoryException, NamingException, MalformedURLException { InitialContext initalContext = new InitialContext(); repository = (Repository) initalContext.lookup("jndi/jca"); session = repository.login(new SimpleCredentials(log, mdp.toCharArray()), null); return session; } }
创建自定义文件类型
import javax.jcr.PropertyType; import javax.jcr.Session; import javax.jcr.nodetype.NodeType; import javax.jcr.nodetype.NodeTypeManager; import javax.jcr.nodetype.NodeTypeTemplate; import javax.jcr.nodetype.PropertyDefinitionTemplate; /** * * @author nathan */ public class FileType { public static void RegisterFileType(Session session) throws Exception { NodeTypeManager nodeTypeManager = session.getWorkspace().getNodeTypeManager(); NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate(); nodeType.setName("FileType"); String[] str = {"nt:resource"}; nodeType.setDeclaredSuperTypeNames(str); nodeType.setMixin(false); nodeType.setQueryable(true); PropertyDefinitionTemplate path = nodeTypeManager.createPropertyDefinitionTemplate(); path.setName("jcr:path"); path.setRequiredType(PropertyType.PATH); path.setQueryOrderable(false); path.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(path); PropertyDefinitionTemplate nom = nodeTypeManager.createPropertyDefinitionTemplate(); nom.setName("jcr:nom"); nom.setRequiredType(PropertyType.STRING); nom.setQueryOrderable(true); nom.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(nom); PropertyDefinitionTemplate description = nodeTypeManager.createPropertyDefinitionTemplate(); description.setName("jcr:description"); description.setRequiredType(PropertyType.STRING); description.setQueryOrderable(true); description.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(description); PropertyDefinitionTemplate motsCles = nodeTypeManager.createPropertyDefinitionTemplate(); motsCles.setName("jcr:motsCles"); motsCles.setRequiredType(PropertyType.STRING); motsCles.setQueryOrderable(true); motsCles.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(motsCles); PropertyDefinitionTemplate size = nodeTypeManager.createPropertyDefinitionTemplate(); size.setName("jcr:size"); size.setRequiredType(PropertyType.STRING); size.setQueryOrderable(true); size.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(size); PropertyDefinitionTemplate users = nodeTypeManager.createPropertyDefinitionTemplate(); users.setName("jcr:users"); users.setRequiredType(PropertyType.STRING); users.setQueryOrderable(true); users.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(users); PropertyDefinitionTemplate groupe = nodeTypeManager.createPropertyDefinitionTemplate(); groupe.setName("jcr:groupe"); groupe.setRequiredType(PropertyType.STRING); groupe.setQueryOrderable(true); groupe.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(groupe); NodeType newnodetype = nodeTypeManager.registerNodeType(nodeType, true); session.save(); } }
为持久性创建摘要class
import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.jcr.Session; import org.apache.jackrabbit.ocm.query.Filter; import org.apache.jackrabbit.ocm.query.impl.FilterImpl; import org.apache.jackrabbit.ocm.query.impl.QueryImpl; import org.apache.jackrabbit.ocm.query.Query; import org.apache.jackrabbit.ocm.query.QueryManager; import org.apache.jackrabbit.ocm.manager.ObjectContentManager; import org.apache.jackrabbit.ocm.manager.impl.ObjectContentManagerImpl; import org.apache.jackrabbit.ocm.mapper.Mapper; import org.apache.jackrabbit.ocm.mapper.impl.annotation.AnnotationMapperImpl; import org.apache.jackrabbit.ocm.reflection.ReflectionUtils; /** * * @author nathan */ public abstract class AbstractBean<T> { private Class<T> entityClass; private ObjectContentManager ocm; private Mapper mapper; public AbstractBean(Class<T> entityClass){ this.entityClass = entityClass; } /** * Construct the Bean according to the extended class * This will be also construct the ObjectContentManager nammed ocm with the default Mapper * @param session javax.jcr.Session attached to the Bean * @return The mapping class found for the desired java bean class */ public AbstractBean(Class<T> entityClass,Session session){ this.entityClass = entityClass; ocm = new ObjectContentManagerImpl(session, this.getDefaultMapper()); } /** * @return ObjectContentManager of the Bean */ public ObjectContentManager getOcm() throws Exception{ return ocm; } /** * Construct the Bean according to the extended class * This will be also construct the ObjectContentManager nammed ocm with the param Mapper given * @param session from "javax.jcr.Session" attached to the Bean * @param map from "org.apache.jackrabbit.ocm.mapper.Mapper" which * is the use to map entity between apllication and The repository * @return ObjectContentManager of the Bean */ public ObjectContentManager getOcm(Session session, Mapper map) throws Exception{ return new ObjectContentManagerImpl(session, map); } public void setOcm(ObjectContentManager ocm) { this.ocm = ocm; } private Mapper getDefaultMapper(){ ReflectionUtils.setClassLoader(com.ged.ocm.entity.Groupe.class.getClassLoader()); List<Class> classes = new ArrayList<Class>(); classes.add(com.ged.ocm.entity.Fichier.class); classes.add(com.ged.ocm.entity.Dossier.class); classes.add(com.ged.ocm.entity.Groupe.class); classes.add(com.ged.ocm.entity.SimpleNode.class); return new AnnotationMapperImpl(classes); } public Mapper getMapper() { return mapper; } public void setMapper(Mapper mapper) { this.mapper = mapper; } public void setLoader(Class classe){ ReflectionUtils.setClassLoader(classe.getClassLoader()); } public void create(T entity) { ocm.insert(entity); ocm.save(); } public void edit(T entity) { ocm.update(entity); ocm.save(); } public void remove(T entity) { ocm.remove(entity); ocm.save(); } public void refresh(){ ocm.refresh(true); ocm.save(); } public void copy(String orgPath, String destPath){ ocm.copy(orgPath, destPath); ocm.save(); } public void move(String orgPath, String destPath){ ocm.move(orgPath, destPath); ocm.save(); } public void removeByPath(String path) { ocm.remove(path); ocm.save(); } public void removeAllByEqual(Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); ocm.remove(query); ocm.save(); } public void removeAllByEqual(String nodePath,Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); ocm.remove(query); ocm.save(); } public boolean isPathExist(String path){ return ocm.objectExists(path); } public T findByPath(String path) { try { return (T)ocm.getObject(path); } catch (Exception e) { return null; } } public T findOneByEqual(Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); T result = null; try { result = results.get(0); } catch (Exception e) { } return result; } public List<T> findAllByEqual(Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public List<T> findAllByLike(Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); for (String key : filters.keySet())filter.addLike(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public List<T> findAllByLikeScoped(String scope,Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(scope); for (String key : filters.keySet())filter.addLike(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public List<T> findAllByOrLike(String attr,String[] val){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); filter.addOrFilter(attr, val); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public T findOneByEqual(String nodePath, Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); T result = results.get(0); return result; } public List<T> findAllByEqual(String nodePath, Map<String,String> filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public List<T> findAllByString(String query){ List<T> results = (List<T>) ocm.getObjects(query,javax.jcr.query.Query.JCR_SQL2); return results; } public List<T> findAllByParentPath(String nodePath){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results; } public List<T> findAllByParentPathOrder(String nodePath, String ordering){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); // query.addOrderByDescending(ordering); query.addOrderByAscending(ordering); List<T> results = (List<T>) ocm.getObjects(query); return results; } public int coutChild(String nodePath){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); List<T> results = (List<T>) ocm.getObjects(query); return results.size(); } public boolean ifExistByPath(String path){ return ocm.objectExists(path); } public String getParentPath(String path){ String parent=""; String[] tmp=path.split("/"); for (int i = 1; i < (tmp.length-1); i++) { parent+="/"+tmp[i]; } return parent; } }
创建 bean
import javax.ejb.Stateless; import com.ged.ocm.entity.Fichier; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.jcr.Node; import javax.jcr.NodeIterator; import javax.jcr.Session; import javax.jcr.Workspace; import javax.jcr.query.QueryResult; import javax.jcr.query.qom.FullTextSearch; import javax.jcr.query.qom.StaticOperand; import org.apache.jackrabbit.ocm.query.Filter; import org.apache.jackrabbit.ocm.query.Query; import org.apache.jackrabbit.ocm.query.QueryManager; @Stateless public class FichierBean extends AbstractBean<Fichier>{ public FichierBean() { super(Fichier.class); } public FichierBean(Session session) { super(Fichier.class,session); } public List<Fichier> findAllByContains(String motCles) throws Exception { String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*')"; List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); return results; } public List<Fichier> findAllByContains(String path,String motCles) throws Exception { String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*') ORDER BY Res.nom"; List<Fichier> tmp = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); List<Fichier> results = new ArrayList<Fichier>(); for (Fichier fichier : tmp) { if(fichier.getPath().startsWith(path))results.add(fichier); } return results; } public List<Fichier> fulltextByOCM(String motCles) throws Exception { QueryManager queryManager = this.getOcm().getQueryManager(); Filter filter; filter = queryManager.createFilter(com.ged.ocm.entity.Fichier.class); filter.addContains(".", "*"+motCles+"*"); Query query = queryManager.createQuery(filter); List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(query); return results; } }
resource adapter
我的配置文件:
repository.xml
<?xml version="1.0"?> <!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN" "http://jackrabbit.apache.org/dtd/repository-1.6.dtd"> <Repository> <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem"> <param name="path" value="${rep.home}/repository"/> </FileSystem> --> <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> <param name="user" value="root" /> <param name="password" value="root" /> <param name="schema" value="mysql"/> <param name="schemaObjectPrefix" value="J_R_FS_"/> </FileSystem> <!-- security configuration --> <Security appName="Jackrabbit"> <AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager" /> <LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule"> <param name="anonymousId" value="anonymous" /> </LoginModule> </Security> <!-- location of workspaces root directory and name of default workspace --> <Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/> <!-- workspace configuration template: used to create the initial workspace if there's no workspace yet --> <Workspace name="${wsp.name}"> <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager"> <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> <param name="user" value="root" /> <param name="password" value="root" /> <param name="schema" value="mysql" /> <param name="schemaObjectPrefix" value="J_PM_${wsp.name}_" /> <param name="externalBLOBs" value="false" /> </PersistenceManager> <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> <param name="user" value="root" /> <param name="password" value="root" /> <param name="schema" value="mysql"/> <param name="schemaObjectPrefix" value="J_FS_${wsp.name}_"/> </FileSystem> <!-- Search index and the file system it uses. class: FQN of class implementing the QueryHandler interface --> <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex"> <param name="path" value="${rep.home}/workspaces/${wsp.name}/index"/> <param name="tikaConfigPath" value="${rep.home}/tika-config.xml"/> <param name="useCompoundFile" value="true"/> <param name="minMergeDocs" value="100"/> <param name="volatileIdleTime" value="3"/> <param name="maxMergeDocs" value="2147483647"/> <param name="mergeFactor" value="10"/> <param name="maxFieldLength" value="10000"/> <param name="bufferSize" value="10"/> <param name="cacheSize" value="1000"/> <param name="forceConsistencyCheck" value="false"/> <param name="enableConsistencyCheck" value="false"/> <param name="autoRepair" value="true"/> <param name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer"/> <param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/> <param name="respectDocumentOrder" value="true"/> <param name="resultFetchSize" value="2147483647"/> <param name="extractorPoolSize" value="0"/> <param name="extractorTimeout" value="100"/> <param name="extractorBackLogSize" value="100"/> <param name="supportHighlighting" value="true"/> <param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.DefaultXMLExcerpt"/> </SearchIndex> </Workspace> <!-- Configures the versioning --> <Versioning rootPath="${rep.home}/version"> <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> <param name="user" value="root" /> <param name="password" value="root" /> <param name="schema" value="mysql"/> <param name="schemaObjectPrefix" value="J_V_FS_"/> </FileSystem> <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager"> <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> <param name="user" value="root" /> <param name="password" value="root" /> <param name="schema" value="mysql" /> <param name="schemaObjectPrefix" value="J_V_PM_" /> <param name="externalBLOBs" value="false" /> </PersistenceManager> </Versioning> <!-- Search index for content that is shared repository wide (/jcr:system tree, contains mainly versions) <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex"> <param name="path" value="${rep.home}/repository/index"/> <param name="extractorPoolSize" value="2"/> <param name="supportHighlighting" value="true"/> </SearchIndex> --> <!-- Cluster configuration with system variables. --> <RepositoryLockMechanism class="org.apache.jackrabbit.core.util.CooperativeFileLock" /> </Repository>
蒂卡-config.xml
<?xml version="1.0" encoding="UTF-8"?> <properties> <mimeTypeRepository resource="/org/apache/tika/mime/tika-mimetypes.xml" magic="false"/> <parsers> <parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser"> <mime>application/xml</mime> <mime>image/svg+xml</mime> </parser> <parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser"> <mime>application/x-tika-msoffice</mime> <mime>application/msword</mime> <mime>application/vnd.ms-excel</mime> <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime> <mime>application/vnd.ms-powerpoint</mime> <mime>application/vnd.visio</mime> <mime>application/vnd.ms-outlook</mime> </parser> <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> <mime>application/x-tika-ooxml</mime> <mime>application/vnd.openxmlformats-package.core-properties+xml</mime> <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime> <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime> <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime> <mime>application/vnd.ms-excel.template.macroenabled.12</mime> <mime>application/vnd.ms-excel.addin.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime> <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime> <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime> <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime> <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime> <mime>application/vnd.ms-word.document.macroenabled.12</mime> <mime>application/vnd.ms-word.template.macroenabled.12</mime> </parser> <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser"> <mime>text/html</mime> <mime>application/xhtml+xml</mime> <mime>application/x-asp</mime> </parser> <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser"> <mime>application/rtf</mime> </parser> <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser"> <mime>application/pdf</mime> </parser> <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser"> <mime>text/plain</mime> </parser> <parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser"> <mime>application/vnd.sun.xml.writer</mime> <mime>application/vnd.oasis.opendocument.text</mime> <mime>application/vnd.oasis.opendocument.graphics</mime> <mime>application/vnd.oasis.opendocument.presentation</mime> <mime>application/vnd.oasis.opendocument.spreadsheet</mime> <mime>application/vnd.oasis.opendocument.chart</mime> <mime>application/vnd.oasis.opendocument.image</mime> <mime>application/vnd.oasis.opendocument.formula</mime> <mime>application/vnd.oasis.opendocument.text-master</mime> <mime>application/vnd.oasis.opendocument.text-web</mime> <mime>application/vnd.oasis.opendocument.text-template</mime> <mime>application/vnd.oasis.opendocument.graphics-template</mime> <mime>application/vnd.oasis.opendocument.presentation-template</mime> <mime>application/vnd.oasis.opendocument.spreadsheet-template</mime> <mime>application/vnd.oasis.opendocument.chart-template</mime> <mime>application/vnd.oasis.opendocument.image-template</mime> <mime>application/vnd.oasis.opendocument.formula-template</mime> <mime>application/x-vnd.oasis.opendocument.text</mime> <mime>application/x-vnd.oasis.opendocument.graphics</mime> <mime>application/x-vnd.oasis.opendocument.presentation</mime> <mime>application/x-vnd.oasis.opendocument.spreadsheet</mime> <mime>application/x-vnd.oasis.opendocument.chart</mime> <mime>application/x-vnd.oasis.opendocument.image</mime> <mime>application/x-vnd.oasis.opendocument.formula</mime> <mime>application/x-vnd.oasis.opendocument.text-master</mime> <mime>application/x-vnd.oasis.opendocument.text-web</mime> <mime>application/x-vnd.oasis.opendocument.text-template</mime> <mime>application/x-vnd.oasis.opendocument.graphics-template</mime> <mime>application/x-vnd.oasis.opendocument.presentation-template</mime> <mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime> <mime>application/x-vnd.oasis.opendocument.chart-template</mime> <mime>application/x-vnd.oasis.opendocument.image-template</mime> <mime>application/x-vnd.oasis.opendocument.formula-template</mime> </parser> <parser name="parse-image" class="org.apache.tika.parser.image.ImageParser"> <mime>image/bmp</mime> <mime>image/gif</mime> <mime>image/jpeg</mime> <mime>image/png</mime> <mime>image/tiff</mime> <mime>image/vnd.wap.wbmp</mime> <mime>image/x-icon</mime> <mime>image/x-psd</mime> <mime>image/x-xcf</mime> </parser> <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser"> <mime>application/x-tika-java-class</mime> </parser> <parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser"> <mime>audio/mpeg</mime> </parser> <parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser"> <mime>application/x-midi</mime> <mime>audio/midi</mime> </parser> <parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser"> <mime>audio/basic</mime> <mime>audio/x-wav</mime> <mime>audio/x-aiff</mime> </parser> </parsers> </properties>
除了我调用函数 public List<Fichier> findAllByContains(String path,String motCles)
对 .docx 和 .xslx 文档进行全文搜索时,bean 的所有查询都有效。对 .pdf、.txt、.xml、.xls、.doc 等的全文搜索完美无缺。
On the same line, I have observed commons-compress-1.5.jar is required by Tika parser in case of OOXML types of documents (i.e. office 2007 documents).
Now, I am able to index & search most of types of documents (office 2007 - docx, pptx, xlsx , office 2003 - doc, ppt, xls, PDF) using below 2 steps:
(1) Updated repository.xml & added Further details can be found at https://issues.apache.org/jira/browse/JCR-3287
(2) Added commons-compress-1.5.jar classpath while running jackrabbit-standalone-2.6.2.jar
解决方案主要针对 jackrabbit-jca-2.7.5.rar!
的 JAR依赖性存在错误,因此我进行了这些更改:
- 添加 apache-mime4j-0。6.jar
- 添加 apache-mime4j-core-0.7.jar
- 添加 commons-compress-1。5.jar
在 jackrabbit-jca-2.7 中添加这些 JAR。5.rar 在部署之前!
并且 .docx、.xlsx、... 的索引编制成功!
感谢@Ashok Felix