Solr 数值溢出
Solr Numeric Overflow
我在使用 DataImportHandler 的 SOLR 上遇到问题,我正在与 oracle 10g 数据库建立连接,我需要导入 1.6 亿条记录,但是当 solr 达到大约 6000 万条时,他抛出异常并中断导入:
java.sql.SQLException: Overflow Numérico
at oracle.jdbc.driver.NumberCommonAccessor.throwOverflow(NumberCommonAccessor.java:4381)
at oracle.jdbc.driver.NumberCommonAccessor.getBigDecimal(NumberCommonAccessor.java:2509)
at oracle.jdbc.driver.NumberCommonAccessor.getObject(NumberCommonAccessor.java:3247)
at oracle.jdbc.driver.T4CNumberAccessor.getObject(T4CNumberAccessor.java:313)
at oracle.jdbc.driver.OracleResultSetImpl.getObject(OracleResultSetImpl.java:1108)
at oracle.jdbc.driver.OracleResultSet.getObject(OracleResultSet.java:462)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.getARow(JdbcDataSource.java:320)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.access0(JdbcDataSource.java:258)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.next(JdbcDataSource.java:298)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.next(JdbcDataSource.java:290)
at org.apache.solr.handler.dataimport.EntityProcessorBase.getNext(EntityProcessorBase.java:133)
at org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:75)
at org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:476)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:415)
at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:330)
at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:232)
at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:416)
at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:480)
at org.apache.solr.handler.dataimport.DataImporter.run(DataImporter.java:461)
配置如下:
数据-config.xml
<dataConfig>
<dataSource
name="DataSourceProduction"
type="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="xxxxxxxxxxxxxxxxxxxx"
user="xxxxx"
password="xxxxx"
batchSize="100"
/>
<document>
<entity
dataSource="DataSourceProduction"
name="maestroObjectValue"
pk="id"
query="SELECT VAL.ID, VAL.OBJECT_VARIANT_ID, VAL.CHARACTERISTIC_ID, VAL.CHARACTERISTIC_VALUE_ID, VAL.VALUE_STRING, VAL.VALUE_NUMBER, VAL.TYPE, VAL.CD_ITEM_EEM, VAL.SEQUENCE, VAL.VALUE_FORMATTED FROM OBJECT_VALUE VAL"
><!-- END -->
<field column="ID" name="ID" />
<field column="OBJECT_VARIANT_ID" name="OBJECT_VARIANT_ID" />
<field column="CHARACTERISTIC_ID" name="CHARACTERISTIC_ID" />
<field column="CHARACTERISTIC_VALUE_ID" name="CHARACTERISTIC_VALUE_ID" />
<field column="VALUE_STRING" name="VALUE_STRING" />
<field column="VALUE_NUMBER" name="VALUE_NUMBER" />
<field column="TYPE" name="TYPE" />
<field column="CD_ITEM_EEM" name="CD_ITEM_EEM" />
<field column="SEQUENCE" name="SEQUENCE" />
<field column="VALUE_FORMATTED" name="VALUE_FORMATTED" />
<entity
dataSource="DataSourceProduction"
name="maestroObjectVariant"
pk="id"
query="SELECT OV.OBJECT_HEADER_ID, OV.OBJECT_TYPE_ID FROM OBJECT_VARIANT OV WHERE OV.ID = '${maestroObjectValue.OBJECT_VARIANT_ID}'"
><!-- END -->
<field column="OBJECT_HEADER_ID" name="OBJECT_HEADER_ID" />
<field column="OBJECT_TYPE_ID" name="OBJECT_TYPE_ID" />
</entity>
</entity>
</document>
schema.xml
<?xml version="1.0" encoding="UTF-8" ?><schema name="objectValue" version="1.5">
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="ID" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="OBJECT_VARIANT_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="CHARACTERISTIC_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="CHARACTERISTIC_VALUE_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_STRING" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_NUMBER" type="double" indexed="true" stored="true" multiValued="false"/>
<field name="TYPE" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="CD_ITEM_EEM" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="SEQUENCE" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_FORMATTED" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="OBJECT_HEADER_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="OBJECT_TYPE_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<uniqueKey>ID</uniqueKey>
<fieldType name="string" class="solr.StrField" />
<fieldType name="int" class="solr.TrieIntField" />
<fieldType name="long" class="solr.TrieLongField" />
<fieldType name="double" class="solr.TrieDoubleField" />
<fieldType name="text_general" class="solr.TextField" />
尝试将 onError="skip"
添加到您的实体标签。
SOLR DIH 文档:Schema for the data config
onError : (abort|skip|continue) . The default value is 'abort' .
'skip' skips the current document. 'continue' continues as if the
error did not happen.
我在使用 DataImportHandler 的 SOLR 上遇到问题,我正在与 oracle 10g 数据库建立连接,我需要导入 1.6 亿条记录,但是当 solr 达到大约 6000 万条时,他抛出异常并中断导入:
java.sql.SQLException: Overflow Numérico
at oracle.jdbc.driver.NumberCommonAccessor.throwOverflow(NumberCommonAccessor.java:4381)
at oracle.jdbc.driver.NumberCommonAccessor.getBigDecimal(NumberCommonAccessor.java:2509)
at oracle.jdbc.driver.NumberCommonAccessor.getObject(NumberCommonAccessor.java:3247)
at oracle.jdbc.driver.T4CNumberAccessor.getObject(T4CNumberAccessor.java:313)
at oracle.jdbc.driver.OracleResultSetImpl.getObject(OracleResultSetImpl.java:1108)
at oracle.jdbc.driver.OracleResultSet.getObject(OracleResultSet.java:462)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.getARow(JdbcDataSource.java:320)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.access0(JdbcDataSource.java:258)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.next(JdbcDataSource.java:298)
at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.next(JdbcDataSource.java:290)
at org.apache.solr.handler.dataimport.EntityProcessorBase.getNext(EntityProcessorBase.java:133)
at org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:75)
at org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:476)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:415)
at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:330)
at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:232)
at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:416)
at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:480)
at org.apache.solr.handler.dataimport.DataImporter.run(DataImporter.java:461)
配置如下:
数据-config.xml
<dataConfig>
<dataSource
name="DataSourceProduction"
type="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="xxxxxxxxxxxxxxxxxxxx"
user="xxxxx"
password="xxxxx"
batchSize="100"
/>
<document>
<entity
dataSource="DataSourceProduction"
name="maestroObjectValue"
pk="id"
query="SELECT VAL.ID, VAL.OBJECT_VARIANT_ID, VAL.CHARACTERISTIC_ID, VAL.CHARACTERISTIC_VALUE_ID, VAL.VALUE_STRING, VAL.VALUE_NUMBER, VAL.TYPE, VAL.CD_ITEM_EEM, VAL.SEQUENCE, VAL.VALUE_FORMATTED FROM OBJECT_VALUE VAL"
><!-- END -->
<field column="ID" name="ID" />
<field column="OBJECT_VARIANT_ID" name="OBJECT_VARIANT_ID" />
<field column="CHARACTERISTIC_ID" name="CHARACTERISTIC_ID" />
<field column="CHARACTERISTIC_VALUE_ID" name="CHARACTERISTIC_VALUE_ID" />
<field column="VALUE_STRING" name="VALUE_STRING" />
<field column="VALUE_NUMBER" name="VALUE_NUMBER" />
<field column="TYPE" name="TYPE" />
<field column="CD_ITEM_EEM" name="CD_ITEM_EEM" />
<field column="SEQUENCE" name="SEQUENCE" />
<field column="VALUE_FORMATTED" name="VALUE_FORMATTED" />
<entity
dataSource="DataSourceProduction"
name="maestroObjectVariant"
pk="id"
query="SELECT OV.OBJECT_HEADER_ID, OV.OBJECT_TYPE_ID FROM OBJECT_VARIANT OV WHERE OV.ID = '${maestroObjectValue.OBJECT_VARIANT_ID}'"
><!-- END -->
<field column="OBJECT_HEADER_ID" name="OBJECT_HEADER_ID" />
<field column="OBJECT_TYPE_ID" name="OBJECT_TYPE_ID" />
</entity>
</entity>
</document>
schema.xml
<?xml version="1.0" encoding="UTF-8" ?><schema name="objectValue" version="1.5">
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="ID" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="OBJECT_VARIANT_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="CHARACTERISTIC_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="CHARACTERISTIC_VALUE_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_STRING" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_NUMBER" type="double" indexed="true" stored="true" multiValued="false"/>
<field name="TYPE" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="CD_ITEM_EEM" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="SEQUENCE" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="VALUE_FORMATTED" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="OBJECT_HEADER_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="OBJECT_TYPE_ID" type="long" indexed="true" stored="true" multiValued="false"/>
<uniqueKey>ID</uniqueKey>
<fieldType name="string" class="solr.StrField" />
<fieldType name="int" class="solr.TrieIntField" />
<fieldType name="long" class="solr.TrieLongField" />
<fieldType name="double" class="solr.TrieDoubleField" />
<fieldType name="text_general" class="solr.TextField" />
尝试将 onError="skip"
添加到您的实体标签。
SOLR DIH 文档:Schema for the data config
onError : (abort|skip|continue) . The default value is 'abort' . 'skip' skips the current document. 'continue' continues as if the error did not happen.