2 # Sphinx configuration file sample
4 # WARNING! While this sample file mentions all available options,
5 # it contains (very) short helper descriptions only. Please refer to
6 # doc/sphinx.html for details.
9 #############################################################################
10 ## data source definition
11 #############################################################################
15 # data source type. mandatory, no default value
16 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
19 #####################################################################
20 ## SQL settings (for 'mysql' and 'pgsql' types)
21 #####################################################################
23 # some straightforward parameters for SQL source types
28 sql_port = 3306 # optional, default is 3306
31 # optional, default is empty (reuse client library defaults)
32 # usually '/var/lib/mysql/mysql.sock' on Linux
33 # usually '/tmp/mysql.sock' on FreeBSD
35 # sql_sock = /tmp/mysql.sock
38 # MySQL specific client connection flags
39 # optional, default is 0
41 # mysql_connect_flags = 32 # enable compression
43 # MySQL specific SSL certificate settings
44 # optional, defaults are empty
46 # mysql_ssl_cert = /etc/ssl/client-cert.pem
47 # mysql_ssl_key = /etc/ssl/client-key.pem
48 # mysql_ssl_ca = /etc/ssl/cacert.pem
50 # MS SQL specific Windows authentication mode flag
51 # MUST be in sync with charset_type index-level setting
52 # optional, default is 0
54 # mssql_winauth = 1 # use currently logged on user credentials
57 # MS SQL specific Unicode indexing flag
58 # optional, default is 0 (request SBCS data)
60 # mssql_unicode = 1 # request Unicode data from server
63 # ODBC specific DSN (data source name)
64 # mandatory for odbc source type, no default value
66 # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
67 # sql_query = SELECT id, data FROM documents.csv
70 # pre-query, executed before the main fetch query
71 # multi-value, optional, default is empty list of queries
73 # sql_query_pre = SET NAMES utf8
74 # sql_query_pre = SET SESSION query_cache_type=OFF
77 # main document fetch query
78 # mandatory, integer document ID field MUST be the first selected column
80 SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
84 # joined/payload field fetch query
85 # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
86 # payload fields let you attach custom per-keyword values (eg. for ranking)
88 # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
89 # joined field QUERY should return 2 columns (docid, text)
90 # payload field QUERY should return 3 columns (docid, keyword, weight)
92 # REQUIRES that query results are in ascending document ID order!
93 # multi-value, optional, default is empty list of queries
95 # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
96 # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
99 # range query setup, query that must return min and max ID values
100 # optional, default is empty
102 # sql_query will need to reference $start and $end boundaries
103 # if using ranged query:
106 # SELECT doc.id, doc.id AS group, doc.title, doc.data \
107 # FROM documents doc \
108 # WHERE id>=$start AND id<=$end
110 # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
114 # optional, default is 1024
116 # sql_range_step = 1000
119 # unsigned integer attribute declaration
120 # multi-value (an arbitrary number of attributes is allowed), optional
121 # optional bit size can be specified, default is 32
123 # sql_attr_uint = author_id
124 # sql_attr_uint = forum_id:9 # 9 bits for forum_id
125 sql_attr_uint = group_id
127 # boolean attribute declaration
128 # multi-value (an arbitrary number of attributes is allowed), optional
129 # equivalent to sql_attr_uint with 1-bit size
131 # sql_attr_bool = is_deleted
134 # bigint attribute declaration
135 # multi-value (an arbitrary number of attributes is allowed), optional
136 # declares a signed (unlike uint!) 64-bit attribute
138 # sql_attr_bigint = my_bigint_id
141 # UNIX timestamp attribute declaration
142 # multi-value (an arbitrary number of attributes is allowed), optional
143 # similar to integer, but can also be used in date functions
145 # sql_attr_timestamp = posted_ts
146 # sql_attr_timestamp = last_edited_ts
147 sql_attr_timestamp = date_added
149 # string ordinal attribute declaration
150 # multi-value (an arbitrary number of attributes is allowed), optional
151 # sorts strings (bytewise), and stores their indexes in the sorted list
152 # sorting by this attr is equivalent to sorting by the original strings
154 # sql_attr_str2ordinal = author_name
157 # floating point attribute declaration
158 # multi-value (an arbitrary number of attributes is allowed), optional
159 # values are stored in single precision, 32-bit IEEE 754 format
161 # sql_attr_float = lat_radians
162 # sql_attr_float = long_radians
165 # multi-valued attribute (MVA) attribute declaration
166 # multi-value (an arbitrary number of attributes is allowed), optional
167 # MVA values are variable length lists of unsigned 32-bit integers
169 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
170 # ATTR-TYPE is 'uint' or 'timestamp'
171 # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
172 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
173 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
175 # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
176 # sql_attr_multi = uint tag from ranged-query; \
177 # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
178 # SELECT MIN(id), MAX(id) FROM tags
181 # string attribute declaration
182 # multi-value (an arbitrary number of these is allowed), optional
183 # lets you store and retrieve strings
185 # sql_attr_string = stitle
186 sql_attr_string = hash
187 sql_attr_string = parent
188 sql_attr_string = subject
189 # sql_field_str2wordcount = subject
192 # wordcount attribute declaration
193 # multi-value (an arbitrary number of these is allowed), optional
194 # lets you count the words at indexing time
196 # sql_attr_str2wordcount = stitle
199 # combined field plus attribute declaration (from a single column)
200 # stores column as an attribute, but also indexes it as a full-text field
202 # sql_field_string = author
203 # sql_field_str2wordcount = title
206 # post-query, executed on sql_query completion
207 # optional, default is empty
212 # post-index-query, executed on successful indexing completion
213 # optional, default is empty
214 # $maxid expands to max document ID actually fetched from DB
216 # sql_query_post_index = REPLACE INTO counters ( id, val ) \
217 # VALUES ( 'max_indexed_id', $maxid )
220 # ranged query throttling, in milliseconds
221 # optional, default is 0 which means no delay
222 # enforces given delay before each query step
223 sql_ranged_throttle = 0
225 # document info query, ONLY for CLI search (ie. testing and debugging)
226 # optional, default is empty
227 # must contain $id macro and must fetch the document by that id
228 sql_query_info = SELECT * FROM documents WHERE id=$id
230 # kill-list query, fetches the document IDs for kill-list
231 # k-list will suppress matches from preceding indexes in the same query
232 # optional, default is empty
234 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
237 # columns to unpack on indexer side when indexing
238 # multi-value, optional, default is empty list
240 # unpack_zlib = zlib_column
241 # unpack_mysqlcompress = compressed_column
242 # unpack_mysqlcompress = compressed_column_2
245 # maximum unpacked length allowed in MySQL COMPRESS() unpacker
246 # optional, default is 16M
248 # unpack_mysqlcompress_maxsize = 16M
251 #####################################################################
253 #####################################################################
257 # shell command to invoke xmlpipe stream producer
260 # xmlpipe_command = cat /var/test.xml
262 #####################################################################
264 #####################################################################
267 # xmlpipe_command = cat /var/test2.xml
270 # xmlpipe2 field declaration
271 # multi-value, optional, default is empty
273 # xmlpipe_field = subject
274 # xmlpipe_field = content
277 # xmlpipe2 attribute declaration
278 # multi-value, optional, default is empty
279 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
281 # xmlpipe_attr_timestamp = published
282 # xmlpipe_attr_uint = author_id
285 # perform UTF-8 validation, and filter out incorrect codes
286 # avoids XML parser choking on non-UTF-8 documents
287 # optional, default is 0
289 # xmlpipe_fixup_utf8 = 1
293 # inherited source example
295 # all the parameters are copied from the parent source,
296 # and may then be overridden in this source definition
297 source src1throttled : src1
299 sql_ranged_throttle = 100
302 #############################################################################
304 #############################################################################
306 # local index example
308 # this is an index which is stored locally in the filesystem
310 # all indexing-time options (such as morphology and charsets)
311 # are configured per local index
315 # optional, default is 'plain'
316 # known values are 'plain', 'distributed', and 'rt' (see samples below)
319 # document source(s) to index
320 # multi-value, mandatory
321 # document IDs must be globally unique across all sources
324 # index files path and file name, without extension
325 # mandatory, path must be writable, extensions will be auto-appended
328 # document attribute values (docinfo) storage mode
329 # optional, default is 'extern'
330 # known values are 'none', 'extern' and 'inline'
333 # memory locking for cached data (.spa and .spi), to prevent swapping
334 # optional, default is 0 (do not mlock)
335 # requires searchd to be run from root
338 # a list of morphology preprocessors to apply
339 # optional, default is empty
341 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
342 # 'soundex', and 'metaphone'; additional preprocessors available from
343 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
344 # (see libstemmer_c/libstemmer/modules.txt)
346 # morphology = stem_en, stem_ru, soundex
347 # morphology = libstemmer_german
348 # morphology = libstemmer_sv
351 # minimum word length at which to enable stemming
352 # optional, default is 1 (stem everything)
354 # min_stemming_len = 1
357 # stopword files list (space separated)
358 # optional, default is empty
359 # contents are plain text, charset_table and stemming are both applied
361 # stopwords = data/stopwords.txt
364 # wordforms file, in "mapfrom > mapto" plain text format
365 # optional, default is empty
367 # wordforms = data/wordforms.txt
370 # tokenizing exceptions file
371 # optional, default is empty
373 # plain text, case sensitive, space insensitive in map-from part
374 # one "Map Several Words => ToASingleOne" entry per line
376 # exceptions = data/exceptions.txt
379 # minimum indexed word length
380 # default is 1 (index everything)
383 # charset encoding type
384 # optional, default is 'sbcs'
385 # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
388 # charset definition and case folding rules "table"
389 # optional, default value depends on charset_type
391 # defaults are configured to include English and Russian characters only
392 # you need to change the table to include additional ones
393 # this behavior MAY change in future versions
395 # 'sbcs' default value is
396 # charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
398 # 'utf-8' default value is
399 # charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
402 # ignored characters list
403 # optional, default value is empty
405 # ignore_chars = U+00AD
408 # minimum word prefix length to index
409 # optional, default is 0 (do not index prefixes)
414 # minimum word infix length to index
415 # optional, default is 0 (do not index infixes)
420 # list of fields to limit prefix/infix indexing to
421 # optional, default value is empty (index all fields in prefix/infix mode)
423 # prefix_fields = filename
424 # infix_fields = url, domain
427 # enable star-syntax (wildcards) when searching prefix/infix indexes
428 # search-time only, does not affect indexing, can be 0 or 1
429 # optional, default is 0 (do not use wildcard syntax)
434 # expand keywords with exact forms and/or stars when searching fit indexes
435 # search-time only, does not affect indexing, can be 0 or 1
436 # optional, default is 0 (do not expand keywords)
438 # expand_keywords = 1
441 # n-gram length to index, for CJK indexing
442 # only supports 0 and 1 for now, other lengths to be implemented
443 # optional, default is 0 (disable n-grams)
448 # n-gram characters list, for CJK indexing
449 # optional, default is empty
451 # ngram_chars = U+3000..U+2FA1F
454 # phrase boundary characters list
455 # optional, default is empty
457 # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
460 # phrase boundary word position increment
461 # optional, default is 0
463 # phrase_boundary_step = 100
466 # blended characters list
467 # blended chars are indexed both as separators and valid characters
468 # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
469 # optional, default is empty
471 # blend_chars = +, &, U+23
474 # whether to strip HTML tags from incoming documents
475 # known values are 0 (do not strip) and 1 (do strip)
476 # optional, default is 0
479 # what HTML attributes to index if stripping HTML
480 # optional, default is empty (do not index anything)
482 # html_index_attrs = img=alt,title; a=title;
485 # what HTML elements contents to strip
486 # optional, default is empty (do not strip element contents)
488 # html_remove_elements = style, script
491 # whether to preopen index data files on startup
492 # optional, default is 0 (do not preopen), searchd-only
497 # whether to keep dictionary (.spi) on disk, or cache it in RAM
498 # optional, default is 0 (cache in RAM), searchd-only
503 # whether to enable in-place inversion (2x less disk, 90-95% speed)
504 # optional, default is 0 (use separate temporary files), indexer-only
509 # in-place fine-tuning options
510 # optional, defaults are listed below
512 # inplace_hit_gap = 0 # preallocated hitlist gap size
513 # inplace_docinfo_gap = 0 # preallocated docinfo gap size
514 # inplace_reloc_factor = 0.1 # relocation buffer size within arena
515 # inplace_write_factor = 0.1 # write buffer size within arena
518 # whether to index original keywords along with stemmed versions
519 # enables "=exactform" operator to work
520 # optional, default is 0
522 # index_exact_words = 1
525 # position increment on overshort (less that min_word_len) words
526 # optional, allowed values are 0 and 1, default is 1
531 # position increment on stopword
532 # optional, allowed values are 0 and 1, default is 1
538 # positions for these keywords will not be stored in the index
539 # optional, allowed values are 'all', or a list file name
541 # hitless_words = all
542 # hitless_words = hitless.txt
546 # inherited index example
548 # all the parameters are copied from the parent index,
549 # and may then be overridden in this index definition
550 index test1stemmed : test1
552 path = data/test1stemmed
557 # distributed index example
559 # this is a virtual index which can NOT be directly indexed,
560 # and only contains references to other local and/or remote indexes
563 # 'distributed' index type MUST be specified
566 # local index to be searched
567 # there can be many local indexes configured
572 # multiple remote agents may be specified
573 # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
574 # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
575 agent = localhost:9313:remote1
576 agent = localhost:9314:remote2,remote3
577 # agent = run/searchd.sock:remote4
579 # blackhole remote agent, for debugging/testing
580 # network errors and search results will be ignored
582 # agent_blackhole = testbox:9312:testindex1,testindex2
585 # remote agent connection timeout, milliseconds
586 # optional, default is 1000 ms, ie. 1 sec
587 agent_connect_timeout = 1000
589 # remote agent query timeout, milliseconds
590 # optional, default is 3000 ms, ie. 3 sec
591 agent_query_timeout = 3000
595 # realtime index example
597 # you can run INSERT, REPLACE, and DELETE on this index on the fly
598 # using MySQL protocol (see 'listen' directive below)
601 # 'rt' index type must be specified to use RT index
604 # index files path and file name, without extension
605 # mandatory, path must be writable, extensions will be auto-appended
608 # RAM chunk size limit
609 # RT index will keep at most this much data in RAM, then flush to disk
610 # optional, default is 32M
612 # rt_mem_limit = 512M
614 # full-text field declaration
615 # multi-value, mandatory
619 # unsigned integer attribute declaration
620 # multi-value (an arbitrary number of attributes is allowed), optional
621 # declares an unsigned 32-bit attribute
624 # bigint attribute declaration
625 # multi-value (an arbitrary number of attributes is allowed), optional
626 # declares a signed 64-bit attribute
628 # rt_attr_bigint = guid
631 # floating point attribute declaration
632 # multi-value (an arbitrary number of attributes is allowed), optional
633 # declares a single precision, 32-bit IEEE 754 format float attribute
635 # rt_attr_float = gpa
638 #############################################################################
640 #############################################################################
644 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
645 # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
648 # maximum IO calls per second (for I/O throttling)
649 # optional, default is 0 (unlimited)
654 # maximum IO call size, bytes (for I/O throttling)
655 # optional, default is 0 (unlimited)
657 # max_iosize = 1048576
660 # maximum xmlpipe2 field length, bytes
661 # optional, default is 2M
663 # max_xmlpipe2_field = 4M
666 # write buffer size, bytes
667 # several (currently up to 4) buffers will be allocated
668 # write buffers are allocated in addition to mem_limit
669 # optional, default is 1M
674 #############################################################################
676 #############################################################################
680 # hostname, port, or hostname:port, or /unix/socket/path to listen on
681 # multi-value, multiple listen points are allowed
682 # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
685 # listen = 192.168.0.1:9312
687 # listen = run/searchd.sock
688 listen = localhost:9306:mysql41
691 # log file, searchd run info is logged here
692 # optional, default is 'searchd.log'
693 log = log/searchd.log
695 # query log file, all search queries are logged here
696 # optional, default is empty (do not log queries)
697 query_log = log/query.log
699 # client read timeout, seconds
700 # optional, default is 5
703 # request timeout, seconds
704 # optional, default is 5 minutes
707 # maximum amount of children to fork (concurrent searches to run)
708 # optional, default is 0 (unlimited)
711 # PID file, searchd process ID file name
713 pid_file = log/searchd.pid
715 # max amount of matches the daemon ever keeps in RAM, per-index
716 # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
717 # default is 1000 (just like Google)
720 # seamless rotate, prevents rotate stalls if precaching huge datasets
721 # optional, default is 1
724 # whether to forcibly preopen all indexes on startup
725 # optional, default is 0 (do not preopen)
728 # whether to unlink .old index copies on succesful rotation.
729 # optional, default is 1 (do unlink)
732 # attribute updates periodic flush timeout, seconds
733 # updates will be automatically dumped to disk this frequently
734 # optional, default is 0 (disable periodic flush)
736 # attr_flush_period = 900
739 # instance-wide ondisk_dict defaults (per-index value take precedence)
740 # optional, default is 0 (precache all dictionaries in RAM)
742 # ondisk_dict_default = 1
745 # MVA updates pool size
746 # shared between all instances of searchd, disables attr flushes!
747 # optional, default size is 1M
748 mva_updates_pool = 1M
750 # max allowed network packet size
751 # limits both query packets from clients, and responses from agents
752 # optional, default size is 8M
756 # searchd will (try to) log crashed query to 'crash_log_path.PID' file
757 # optional, default is empty (do not create crash logs)
759 # crash_log_path = log/crash
762 # max allowed per-query filter count
763 # optional, default is 256
766 # max allowed per-filter values count
767 # optional, default is 4096
768 max_filter_values = 4096
771 # socket listen queue length
772 # optional, default is 5
777 # per-keyword read buffer size
778 # optional, default is 256K
783 # unhinted read size (currently used when reading hits)
784 # optional, default is 32K
786 # read_unhinted = 32K
789 # max allowed per-batch query count (aka multi-query count)
790 # optional, default is 32
791 max_batch_queries = 32
794 # max common subtree document cache size, per-query
795 # optional, default is 512K, 0 means disable subtree optimization
797 # subtree_docs_cache = 4M
800 # max common subtree hit cache size, per-query
801 # optional, default is 1M, 0 means disable subtree optimization
803 # subtree_hits_cache = 8M
806 # multi-processing mode (MPM)
807 # known values are none, fork, prefork, and threads
808 # optional, default is fork
814 # max threads to create for searching local parts of a distributed index
815 # optional, default is 0, which means disable multi-threaded searching
816 # should work with all MPMs (ie. does NOT require workers=threads)
825 # data source type. mandatory, no default value
826 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
829 #####################################################################
830 ## SQL settings (for 'mysql' and 'pgsql' types)
831 #####################################################################
833 # some straightforward parameters for SQL source types
838 sql_port = 3306 # optional, default is 3306
841 # optional, default is empty (reuse client library defaults)
842 # usually '/var/lib/mysql/mysql.sock' on Linux
843 # usually '/tmp/mysql.sock' on FreeBSD
845 # sql_sock = /tmp/mysql.sock
848 # MySQL specific client connection flags
849 # optional, default is 0
851 # mysql_connect_flags = 32 # enable compression
853 # MySQL specific SSL certificate settings
854 # optional, defaults are empty
856 # mysql_ssl_cert = /etc/ssl/client-cert.pem
857 # mysql_ssl_key = /etc/ssl/client-key.pem
858 # mysql_ssl_ca = /etc/ssl/cacert.pem
860 # MS SQL specific Windows authentication mode flag
861 # MUST be in sync with charset_type index-level setting
862 # optional, default is 0
864 # mssql_winauth = 1 # use currently logged on user credentials
867 # MS SQL specific Unicode indexing flag
868 # optional, default is 0 (request SBCS data)
870 # mssql_unicode = 1 # request Unicode data from server
873 # ODBC specific DSN (data source name)
874 # mandatory for odbc source type, no default value
876 # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
877 # sql_query = SELECT id, data FROM documents.csv
880 # pre-query, executed before the main fetch query
881 # multi-value, optional, default is empty list of queries
883 # sql_query_pre = SET NAMES utf8
884 # sql_query_pre = SET SESSION query_cache_type=OFF
887 # main document fetch query
888 # mandatory, integer document ID field MUST be the first selected column
890 SELECT id, hash, parent, UNIX_TIMESTAMP(timestamp) AS timestamp, subject \
894 # joined/payload field fetch query
895 # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
896 # payload fields let you attach custom per-keyword values (eg. for ranking)
898 # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
899 # joined field QUERY should return 2 columns (docid, text)
900 # payload field QUERY should return 3 columns (docid, keyword, weight)
902 # REQUIRES that query results are in ascending document ID order!
903 # multi-value, optional, default is empty list of queries
905 # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
906 # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
909 # range query setup, query that must return min and max ID values
910 # optional, default is empty
912 # sql_query will need to reference $start and $end boundaries
913 # if using ranged query:
916 # SELECT doc.id, doc.id AS group, doc.title, doc.data \
917 # FROM documents doc \
918 # WHERE id>=$start AND id<=$end
920 # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
924 # optional, default is 1024
926 # sql_range_step = 1000
929 # unsigned integer attribute declaration
930 # multi-value (an arbitrary number of attributes is allowed), optional
931 # optional bit size can be specified, default is 32
933 # sql_attr_uint = author_id
934 # sql_attr_uint = forum_id:9 # 9 bits for forum_id
935 #sql_attr_uint = group_id
937 # boolean attribute declaration
938 # multi-value (an arbitrary number of attributes is allowed), optional
939 # equivalent to sql_attr_uint with 1-bit size
941 # sql_attr_bool = is_deleted
944 # bigint attribute declaration
945 # multi-value (an arbitrary number of attributes is allowed), optional
946 # declares a signed (unlike uint!) 64-bit attribute
948 # sql_attr_bigint = my_bigint_id
951 # UNIX timestamp attribute declaration
952 # multi-value (an arbitrary number of attributes is allowed), optional
953 # similar to integer, but can also be used in date functions
955 # sql_attr_timestamp = posted_ts
956 # sql_attr_timestamp = last_edited_ts
957 sql_attr_timestamp = timestamp
959 # string ordinal attribute declaration
960 # multi-value (an arbitrary number of attributes is allowed), optional
961 # sorts strings (bytewise), and stores their indexes in the sorted list
962 # sorting by this attr is equivalent to sorting by the original strings
964 # sql_attr_str2ordinal = author_name
967 # floating point attribute declaration
968 # multi-value (an arbitrary number of attributes is allowed), optional
969 # values are stored in single precision, 32-bit IEEE 754 format
971 # sql_attr_float = lat_radians
972 # sql_attr_float = long_radians
975 # multi-valued attribute (MVA) attribute declaration
976 # multi-value (an arbitrary number of attributes is allowed), optional
977 # MVA values are variable length lists of unsigned 32-bit integers
979 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
980 # ATTR-TYPE is 'uint' or 'timestamp'
981 # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
982 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
983 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
985 # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
986 # sql_attr_multi = uint tag from ranged-query; \
987 # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
988 # SELECT MIN(id), MAX(id) FROM tags
991 # string attribute declaration
992 # multi-value (an arbitrary number of these is allowed), optional
993 # lets you store and retrieve strings
995 # sql_attr_string = stitle
998 # wordcount attribute declaration
999 # multi-value (an arbitrary number of these is allowed), optional
1000 # lets you count the words at indexing time
1002 # sql_attr_str2wordcount = stitle
1005 # combined field plus attribute declaration (from a single column)
1006 # stores column as an attribute, but also indexes it as a full-text field
1008 # sql_field_string = author
1009 # sql_field_str2wordcount = title
1012 # post-query, executed on sql_query completion
1013 # optional, default is empty
1018 # post-index-query, executed on successful indexing completion
1019 # optional, default is empty
1020 # $maxid expands to max document ID actually fetched from DB
1022 # sql_query_post_index = REPLACE INTO counters ( id, val ) \
1023 # VALUES ( 'max_indexed_id', $maxid )
1026 # ranged query throttling, in milliseconds
1027 # optional, default is 0 which means no delay
1028 # enforces given delay before each query step
1029 sql_ranged_throttle = 0
1031 # document info query, ONLY for CLI search (ie. testing and debugging)
1032 # optional, default is empty
1033 # must contain $id macro and must fetch the document by that id
1034 sql_query_info = SELECT * FROM log WHERE id=$id
1036 # kill-list query, fetches the document IDs for kill-list
1037 # k-list will suppress matches from preceding indexes in the same query
1038 # optional, default is empty
1040 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
1043 # columns to unpack on indexer side when indexing
1044 # multi-value, optional, default is empty list
1046 # unpack_zlib = zlib_column
1047 # unpack_mysqlcompress = compressed_column
1048 # unpack_mysqlcompress = compressed_column_2
1051 # maximum unpacked length allowed in MySQL COMPRESS() unpacker
1052 # optional, default is 16M
1054 # unpack_mysqlcompress_maxsize = 16M
1057 #####################################################################
1059 #####################################################################
1063 # shell command to invoke xmlpipe stream producer
1066 # xmlpipe_command = cat /var/test.xml
1068 #####################################################################
1069 ## xmlpipe2 settings
1070 #####################################################################
1073 # xmlpipe_command = cat /var/test2.xml
1076 # xmlpipe2 field declaration
1077 # multi-value, optional, default is empty
1079 # xmlpipe_field = subject
1080 # xmlpipe_field = content
1083 # xmlpipe2 attribute declaration
1084 # multi-value, optional, default is empty
1085 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
1087 # xmlpipe_attr_timestamp = published
1088 # xmlpipe_attr_uint = author_id
1091 # perform UTF-8 validation, and filter out incorrect codes
1092 # avoids XML parser choking on non-UTF-8 documents
1093 # optional, default is 0
1095 # xmlpipe_fixup_utf8 = 1
1098 #############################################################################
1100 #############################################################################
1102 # local index example
1104 # this is an index which is stored locally in the filesystem
1106 # all indexing-time options (such as morphology and charsets)
1107 # are configured per local index
1111 # optional, default is 'plain'
1112 # known values are 'plain', 'distributed', and 'rt' (see samples below)
1115 # document source(s) to index
1116 # multi-value, mandatory
1117 # document IDs must be globally unique across all sources
1120 # index files path and file name, without extension
1121 # mandatory, path must be writable, extensions will be auto-appended
1124 # document attribute values (docinfo) storage mode
1125 # optional, default is 'extern'
1126 # known values are 'none', 'extern' and 'inline'
1129 # memory locking for cached data (.spa and .spi), to prevent swapping
1130 # optional, default is 0 (do not mlock)
1131 # requires searchd to be run from root
1134 # a list of morphology preprocessors to apply
1135 # optional, default is empty
1137 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
1138 # 'soundex', and 'metaphone'; additional preprocessors available from
1139 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
1140 # (see libstemmer_c/libstemmer/modules.txt)
1142 # morphology = stem_en, stem_ru, soundex
1143 # morphology = libstemmer_german
1144 # morphology = libstemmer_sv
1147 # minimum word length at which to enable stemming
1148 # optional, default is 1 (stem everything)
1150 # min_stemming_len = 1
1153 # stopword files list (space separated)
1154 # optional, default is empty
1155 # contents are plain text, charset_table and stemming are both applied
1157 # stopwords = data/stopwords.txt
1160 # wordforms file, in "mapfrom > mapto" plain text format
1161 # optional, default is empty
1163 # wordforms = data/wordforms.txt
1166 # tokenizing exceptions file
1167 # optional, default is empty
1169 # plain text, case sensitive, space insensitive in map-from part
1170 # one "Map Several Words => ToASingleOne" entry per line
1172 # exceptions = data/exceptions.txt
1175 # minimum indexed word length
1176 # default is 1 (index everything)
1179 # charset encoding type
1180 # optional, default is 'sbcs'
1181 # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
1184 # charset definition and case folding rules "table"
1185 # optional, default value depends on charset_type
1187 # defaults are configured to include English and Russian characters only
1188 # you need to change the table to include additional ones
1189 # this behavior MAY change in future versions
1191 # 'sbcs' default value is
1192 # charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
1194 # 'utf-8' default value is
1195 # charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
1198 # ignored characters list
1199 # optional, default value is empty
1201 # ignore_chars = U+00AD
1204 # minimum word prefix length to index
1205 # optional, default is 0 (do not index prefixes)
1207 # min_prefix_len = 0
1210 # minimum word infix length to index
1211 # optional, default is 0 (do not index infixes)
1216 # list of fields to limit prefix/infix indexing to
1217 # optional, default value is empty (index all fields in prefix/infix mode)
1219 # prefix_fields = filename
1220 # infix_fields = url, domain
1223 # enable star-syntax (wildcards) when searching prefix/infix indexes
1224 # search-time only, does not affect indexing, can be 0 or 1
1225 # optional, default is 0 (do not use wildcard syntax)
1230 # expand keywords with exact forms and/or stars when searching fit indexes
1231 # search-time only, does not affect indexing, can be 0 or 1
1232 # optional, default is 0 (do not expand keywords)
1234 # expand_keywords = 1
1237 # n-gram length to index, for CJK indexing
1238 # only supports 0 and 1 for now, other lengths to be implemented
1239 # optional, default is 0 (disable n-grams)
1244 # n-gram characters list, for CJK indexing
1245 # optional, default is empty
1247 # ngram_chars = U+3000..U+2FA1F
1250 # phrase boundary characters list
1251 # optional, default is empty
1253 # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
1256 # phrase boundary word position increment
1257 # optional, default is 0
1259 # phrase_boundary_step = 100
1262 # blended characters list
1263 # blended chars are indexed both as separators and valid characters
1264 # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
1265 # optional, default is empty
1267 # blend_chars = +, &, U+23
1270 # whether to strip HTML tags from incoming documents
1271 # known values are 0 (do not strip) and 1 (do strip)
1272 # optional, default is 0
1275 # what HTML attributes to index if stripping HTML
1276 # optional, default is empty (do not index anything)
1278 # html_index_attrs = img=alt,title; a=title;
1281 # what HTML elements contents to strip
1282 # optional, default is empty (do not strip element contents)
1284 # html_remove_elements = style, script
1287 # whether to preopen index data files on startup
1288 # optional, default is 0 (do not preopen), searchd-only
1293 # whether to keep dictionary (.spi) on disk, or cache it in RAM
1294 # optional, default is 0 (cache in RAM), searchd-only
1299 # whether to enable in-place inversion (2x less disk, 90-95% speed)
1300 # optional, default is 0 (use separate temporary files), indexer-only
1302 # inplace_enable = 1
1305 # in-place fine-tuning options
1306 # optional, defaults are listed below
1308 # inplace_hit_gap = 0 # preallocated hitlist gap size
1309 # inplace_docinfo_gap = 0 # preallocated docinfo gap size
1310 # inplace_reloc_factor = 0.1 # relocation buffer size within arena
1311 # inplace_write_factor = 0.1 # write buffer size within arena
1314 # whether to index original keywords along with stemmed versions
1315 # enables "=exactform" operator to work
1316 # optional, default is 0
1318 # index_exact_words = 1
1321 # position increment on overshort (less that min_word_len) words
1322 # optional, allowed values are 0 and 1, default is 1
1324 # overshort_step = 1
1327 # position increment on stopword
1328 # optional, allowed values are 0 and 1, default is 1
1333 # hitless words list
1334 # positions for these keywords will not be stored in the index
1335 # optional, allowed values are 'all', or a list file name
1337 # hitless_words = all
1338 # hitless_words = hitless.txt