solr.xml-lostCommitIssue

<?xml version="1.0" encoding="UTF-8" ?>

<!--
     For more details about configurations options that may appear in
     this file, see http://wiki.apache.org/solr/SolrConfigXml.
-->
<config>
    <!-- Controls what version of Lucene various components of Solr
         adhere to.  Generally, you want to use the latest version to
         get all bug fixes and improvements. It is highly recommended
         that you fully re-index after changing this setting as it can
         affect both how text is indexed and queried.
    -->
    <luceneMatchVersion>8.4.1</luceneMatchVersion>


    <!-- A 'dir' option by itself adds any files found in the directory
         to the classpath, this is useful for including all jars in a
         directory.

         When a 'regex' is specified in addition to a 'dir', only the
         files in that directory which completely match the regex
         (anchored on both ends) will be included.

         If a 'dir' option (with or without a regex) is used and nothing
         is found that matches, a warning will be logged.

         The examples below can be used to load some solr-contribs along
         with their external dependencies.
      -->
    <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar"/>
    <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar"/>

    <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar"/>
    <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar"/>

    <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar"/>
    <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar"/>

    <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar"/>
    <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar"/>

    <!-- Data Directory

         Used to specify an alternate directory to hold all index data
         other than the default ./data under the Solr home.  If
         replication is in use, this should match the replication
         configuration.
      -->
    <dataDir>${solr.data.dir:}</dataDir>


    <!-- The DirectoryFactory to use for indexes.

         solr.StandardDirectoryFactory is filesystem
         based and tries to pick the best implementation for the current
         JVM and platform.  solr.NRTCachingDirectoryFactory, the default,
         wraps solr.StandardDirectoryFactory and caches small files in memory
         for better NRT performance.

         One can force a particular implementation via solr.MMapDirectoryFactory,
         solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.

         solr.RAMDirectoryFactory is memory based and not persistent.
      -->
    <directoryFactory name="DirectoryFactory"
                      class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>

    <!-- The CodecFactory for defining the format of the inverted index.
         The default implementation is SchemaCodecFactory, which is the official Lucene
         index format, but hooks into the schema to provide per-field customization of
         the postings lists and per-document values in the fieldType element
         (postingsFormat/docValuesFormat). Note that most of the alternative implementations
         are experimental, so if you choose to customize the index format, it's a good
         idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
         before upgrading to a newer version to avoid unnecessary reindexing.
         A "compressionMode" string element can be added to <codecFactory> to choose
         between the existing compression modes in the default codec: "BEST_SPEED" (default)
         or "BEST_COMPRESSION".
    -->
    <codecFactory class="solr.SchemaCodecFactory"/>

    <!-- To disable dynamic schema REST APIs, use the following for <schemaFactory>: -->

    <schemaFactory class="ClassicIndexSchemaFactory"/>


    <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         Index Config - These settings control low-level behavior of indexing
         Most example settings here show the default value, but are commented
         out, to more easily see where customizations have been made.

         Note: This replaces <indexDefaults> and <mainIndex> from older versions
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
    <indexConfig>
        <!-- LockFactory

             This option specifies which Lucene LockFactory implementation
             to use.

             single = SingleInstanceLockFactory - suggested for a
                      read-only index or when there is no possibility of
                      another process trying to modify the index.
             native = NativeFSLockFactory - uses OS native file locking.
                      Do not use when multiple solr webapps in the same
                      JVM are attempting to share a single index.
             simple = SimpleFSLockFactory  - uses a plain file for locking

             Defaults: 'native' is default for Solr3.6 and later, otherwise
                       'simple' is the default

             More details on the nuances of each LockFactory...
             http://wiki.apache.org/lucene-java/AvailableLockFactories
        -->
        <lockType>${solr.lock.type:native}</lockType>

    </indexConfig>


    <!-- JMX

         This example enables JMX if and only if an existing MBeanServer
         is found, use this if you want to configure JMX through JVM
         parameters. Remove this to disable exposing Solr configuration
         and statistics to JMX.

         For more details see http://wiki.apache.org/solr/SolrJmx
      -->
    <jmx/>
    <!-- If you want to connect to a particular server, specify the
         agentId
      -->
    <!-- <jmx agentId="myAgent" /> -->
    <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
    <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
      -->

    <!-- The default high-performance update handler -->
    <updateHandler class="solr.DirectUpdateHandler2">

        <!-- Enables a transaction log, used for real-time get, durability, and
             and solr cloud replica recovery.  The log can grow as big as
             uncommitted changes to the index, so use of a hard autoCommit
             is recommended (see below).
             "dir" - the target directory for transaction logs, defaults to the
                    solr data directory.
             "numVersionBuckets" - sets the number of buckets used to keep
                    track of max version values when checking for re-ordered
                    updates; increase this value to reduce the cost of
                    synchronizing access to version buckets during high-volume
                    indexing, this requires 8 bytes (long) * numVersionBuckets
                    of heap space per Solr core.
        -->
        <updateLog>
            <str name="dir">${solr.ulog.dir:}</str>
            <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
        </updateLog>

        <!-- AutoCommit

             Perform a hard commit automatically under certain conditions.
             Instead of enabling autoCommit, consider using "commitWithin"
             when adding documents.

             http://wiki.apache.org/solr/UpdateXmlMessages

             maxDocs - Maximum number of documents to add since the last
                       commit before automatically triggering a new commit.

             maxTime - Maximum amount of time in ms that is allowed to pass
                       since a document was added before automatically
                       triggering a new commit.
             openSearcher - if false, the commit causes recent index changes
               to be flushed to stable storage, but does not cause a new
               searcher to be opened to make those changes visible.

             If the updateLog is enabled, then it's highly recommended to
             have some sort of hard autoCommit to limit the log size.
          -->
        <autoCommit>
            <maxDocs>${solr.autoCommit.maxDocs:10000}</maxDocs>
            <openSearcher>false</openSearcher>
        </autoCommit>

        <!-- softAutoCommit is like autoCommit except it causes a
             'soft' commit which only ensures that changes are visible
             but does not ensure that data is synced to disk.  This is
             faster and more near-realtime friendly than a hard commit.
          -->

        <autoSoftCommit>
            <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
        </autoSoftCommit>

    </updateHandler>

    <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         Query section - these settings control query time things like caches
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
    <query>

        <!-- Maximum number of clauses in each BooleanQuery,  an exception
             is thrown if exceeded.  It is safe to increase or remove this setting,
             since it is purely an arbitrary limit to try and catch user errors where
             large boolean queries may not be the best implementation choice.
          -->
        <maxBooleanClauses>1024</maxBooleanClauses>

        <!-- Solr Internal Query Caches

             There are two implementations of cache available for Solr,
             LRUCache, based on a synchronized LinkedHashMap, and
             FastLRUCache, based on a ConcurrentHashMap.

             FastLRUCache has faster gets and slower puts in single
             threaded operation and thus is generally faster than LRUCache
             when the hit ratio of the cache is high (> 75%), and may be
             faster under other scenarios on multi-cpu systems.
        -->

        <!-- Filter Cache

             Cache used by SolrIndexSearcher for filters (DocSets),
             unordered sets of *all* documents that match a query.  When a
             new searcher is opened, its caches may be prepopulated or
             "autowarmed" using data from caches in the old searcher.
             autowarmCount is the number of items to prepopulate.  For
             LRUCache, the autowarmed items will be the most recently
             accessed items.

             Parameters:
               class - the SolrCache implementation LRUCache or
                   (LRUCache or FastLRUCache)
               size - the maximum number of entries in the cache
               initialSize - the initial capacity (number of entries) of
                   the cache.  (see java.util.HashMap)
               autowarmCount - the number of entries to prepopulate from
                   and old cache.
               maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
                          to occupy. Note that when this option is specified, the size
                          and initialSize parameters are ignored.
          -->
        <filterCache class="solr.FastLRUCache"
                     size="512"
                     initialSize="512"
                     autowarmCount="0"/>

        <!-- Query Result Cache

             Caches results of searches - ordered lists of document ids
             (DocList) based on a query, a sort, and the range of documents requested.
             Additional supported parameter by LRUCache:
                maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
                           to occupy
          -->
        <queryResultCache class="solr.LRUCache"
                          size="512"
                          initialSize="512"
                          autowarmCount="0"/>

        <!-- Document Cache

             Caches Lucene Document objects (the stored fields for each
             document).  Since Lucene internal document ids are transient,
             this cache will not be autowarmed.
          -->
        <documentCache class="solr.LRUCache"
                       size="512"
                       initialSize="512"
                       autowarmCount="0"/>

        <!-- custom cache currently used by block join -->
        <cache name="perSegFilter"
               class="solr.search.LRUCache"
               size="10"
               initialSize="0"
               autowarmCount="0"
               regenerator="solr.NoOpRegenerator"/>

        <!-- Field Value Cache

             Cache used to hold field values that are quickly accessible
             by document id.  The fieldValueCache is created by default
             even if not configured here.
          -->
        <fieldValueCache class="solr.FastLRUCache"
                         size="512"
                         autowarmCount="0"
                         showItems="32" />

        <!-- Lazy Field Loading

             If true, stored fields that are not requested will be loaded
             lazily.  This can result in a significant speed improvement
             if the usual case is to not load all stored fields,
             especially if the skipped fields are large compressed text
             fields.
        -->
        <enableLazyFieldLoading>true</enableLazyFieldLoading>

        <!-- Use Filter For Sorted Query

             A possible optimization that attempts to use a filter to
             satisfy a search.  If the requested sort does not include
             score, then the filterCache will be checked for a filter
             matching the query. If found, the filter will be used as the
             source of document ids, and then the sort will be applied to
             that.

             For most situations, this will not be useful unless you
             frequently get the same search repeatedly with different sort
             options, and none of them ever use "score"
          -->
        <!--
           <useFilterForSortedQuery>true</useFilterForSortedQuery>
          -->

        <!-- Result Window Size

             An optimization for use with the queryResultCache.  When a search
             is requested, a superset of the requested number of document ids
             are collected.  For example, if a search for a particular query
             requests matching documents 10 through 19, and queryWindowSize is 50,
             then documents 0 through 49 will be collected and cached.  Any further
             requests in that range can be satisfied via the cache.
          -->
        <queryResultWindowSize>20</queryResultWindowSize>

        <!-- Maximum number of documents to cache for any entry in the
             queryResultCache.
          -->
        <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

        <!-- Use Cold Searcher

             If a search request comes in and there is no current
             registered searcher, then immediately register the still
             warming searcher and use it.  If "false" then all requests
             will block until the first searcher is done warming.
          -->
        <useColdSearcher>true</useColdSearcher>


        <!-- Unfortunately, Solr does not allow to differentiate the slow request threshold between write (/update) and
        read (/select) queries. While we accept batch updates with 10s oder 20s duration, read queries should be <1s.
        If we set the threshold to 1s, the log is spammed with thousands of entries in the indexing phase.
        Therefore we use the following configuration:
        - in solrconfig.xml of the collections: slowQueryThresholdMillis = 1000. This logs queries with log level "warn"
        - in log4j2.xml log level "error", i.e. by default do not log any slow queries.
        If you need to debug slow queries, you can temporarily set the log level for class
        org.apache.solr.core.SolrCore.SlowRequest to "warn" or lower using the Solr Admin UI
        (see https://lucene.apache.org/solr/guide/7_7/configuring-logging.html#temporary-logging-settings) -->
        <slowQueryThresholdMillis>1000</slowQueryThresholdMillis>

    </query>


    <!-- Request Dispatcher

         This section contains instructions for how the SolrDispatchFilter
         should behave when processing requests for this SolrCore.

      -->
    <requestDispatcher handleSelect="false" >
        <!-- HTTP Caching

             Set HTTP caching related parameters (for proxy caches and clients).

             The options below instruct Solr not to output any HTTP Caching
             related headers
          -->
        <httpCaching never304="true"/>
        <!-- If you include a <cacheControl> directive, it will be used to
             generate a Cache-Control header (as well as an Expires header
             if the value contains "max-age=")

             By default, no Cache-Control header is generated.

             You can use the <cacheControl> option even if you have set
             never304="true"
          -->
        <!--
           <httpCaching never304="true" >
             <cacheControl>max-age=30, public</cacheControl>
           </httpCaching>
          -->
        <!-- To enable Solr to respond with automatically generated HTTP
             Caching headers, and to response to Cache Validation requests
             correctly, set the value of never304="false"

             This will cause Solr to generate Last-Modified and ETag
             headers based on the properties of the Index.

             The following options can also be specified to affect the
             values of these headers...

             lastModFrom - the default value is "openTime" which means the
             Last-Modified value (and validation against If-Modified-Since
             requests) will all be relative to when the current Searcher
             was opened.  You can change it to lastModFrom="dirLastMod" if
             you want the value to exactly correspond to when the physical
             index was last modified.

             etagSeed="..." is an option you can change to force the ETag
             header (and validation against If-None-Match requests) to be
             different even if the index has not changed (ie: when making
             significant changes to your config file)

             (lastModifiedFrom and etagSeed are both ignored if you use
             the never304="true" option)
          -->
        <!--
           <httpCaching lastModifiedFrom="openTime"
                        etagSeed="Solr">
             <cacheControl>max-age=30, public</cacheControl>
           </httpCaching>
          -->
    </requestDispatcher>

    <!-- Request Handlers

         http://wiki.apache.org/solr/SolrRequestHandler

         Incoming queries will be dispatched to a specific handler by name
         based on the path specified in the request.

         If a Request Handler is declared with startup="lazy", then it will
         not be initialized until the first request that uses it.

      -->
    <!-- SearchHandler

         http://wiki.apache.org/solr/SearchHandler

         For processing Search Queries, the primary Request Handler
         provided with Solr is "SearchHandler" It delegates to a sequent
         of SearchComponents (see below) and supports distributed
         queries across multiple shards
      -->
    <requestHandler name="/select" class="solr.SearchHandler">
        <!-- default values for query parameters can be specified, these
             will be overridden by parameters in the request
          -->
        <lst name="defaults">
            <str name="echoParams">explicit</str>
            <int name="rows">10</int>
        </lst>
    </requestHandler>

    <!-- A request handler that returns indented JSON by default -->
    <requestHandler name="/query" class="solr.SearchHandler">
        <lst name="defaults">
            <str name="echoParams">explicit</str>
            <str name="wt">json</str>
            <str name="indent">true</str>
        </lst>
    </requestHandler>


    <initParams path="/update/**,/query,/select">
        <lst name="defaults">
            <str name="df">_text_</str>
        </lst>
    </initParams>


    <!-- Update Processors

         Chains of Update Processor Factories for dealing with Update
         Requests can be declared, and then used by name in Update
         Request Processors

         http://wiki.apache.org/solr/UpdateRequestProcessor

      -->


    <queryResponseWriter name="json" class="solr.JSONResponseWriter" />


</config>