2 # Sphinx configuration file sample
4 # WARNING! While this sample file mentions all available options,
5 # it contains (very) short helper descriptions only. Please refer to
6 # doc/sphinx.html for details.
9 #############################################################################
10 ## data source definition
11 #############################################################################
15 # data source type. mandatory, no default value
16 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
19 #####################################################################
20 ## SQL settings (for 'mysql' and 'pgsql' types)
21 #####################################################################
23 # some straightforward parameters for SQL source types
28 sql_port = 3306 # optional, default is 3306
31 # optional, default is empty (reuse client library defaults)
32 # usually '/var/lib/mysql/mysql.sock' on Linux
33 # usually '/tmp/mysql.sock' on FreeBSD
35 # sql_sock = /tmp/mysql.sock
38 # MySQL specific client connection flags
39 # optional, default is 0
41 # mysql_connect_flags = 32 # enable compression
43 # MySQL specific SSL certificate settings
44 # optional, defaults are empty
46 # mysql_ssl_cert = /etc/ssl/client-cert.pem
47 # mysql_ssl_key = /etc/ssl/client-key.pem
48 # mysql_ssl_ca = /etc/ssl/cacert.pem
50 # MS SQL specific Windows authentication mode flag
51 # MUST be in sync with charset_type index-level setting
52 # optional, default is 0
54 # mssql_winauth = 1 # use currently logged on user credentials
57 # MS SQL specific Unicode indexing flag
58 # optional, default is 0 (request SBCS data)
60 # mssql_unicode = 1 # request Unicode data from server
63 # ODBC specific DSN (data source name)
64 # mandatory for odbc source type, no default value
66 # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
67 # sql_query = SELECT id, data FROM documents.csv
70 # pre-query, executed before the main fetch query
71 # multi-value, optional, default is empty list of queries
73 # sql_query_pre = SET NAMES utf8
74 # sql_query_pre = SET SESSION query_cache_type=OFF
77 # main document fetch query
78 # mandatory, integer document ID field MUST be the first selected column
80 SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
84 # joined/payload field fetch query
85 # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
86 # payload fields let you attach custom per-keyword values (eg. for ranking)
88 # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
89 # joined field QUERY should return 2 columns (docid, text)
90 # payload field QUERY should return 3 columns (docid, keyword, weight)
92 # REQUIRES that query results are in ascending document ID order!
93 # multi-value, optional, default is empty list of queries
95 # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
96 # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
99 # range query setup, query that must return min and max ID values
100 # optional, default is empty
102 # sql_query will need to reference $start and $end boundaries
103 # if using ranged query:
106 # SELECT doc.id, doc.id AS group, doc.title, doc.data \
107 # FROM documents doc \
108 # WHERE id>=$start AND id<=$end
110 # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
114 # optional, default is 1024
116 # sql_range_step = 1000
119 # unsigned integer attribute declaration
120 # multi-value (an arbitrary number of attributes is allowed), optional
121 # optional bit size can be specified, default is 32
123 # sql_attr_uint = author_id
124 # sql_attr_uint = forum_id:9 # 9 bits for forum_id
125 sql_attr_uint = group_id
127 # boolean attribute declaration
128 # multi-value (an arbitrary number of attributes is allowed), optional
129 # equivalent to sql_attr_uint with 1-bit size
131 # sql_attr_bool = is_deleted
134 # bigint attribute declaration
135 # multi-value (an arbitrary number of attributes is allowed), optional
136 # declares a signed (unlike uint!) 64-bit attribute
138 # sql_attr_bigint = my_bigint_id
141 # UNIX timestamp attribute declaration
142 # multi-value (an arbitrary number of attributes is allowed), optional
143 # similar to integer, but can also be used in date functions
145 # sql_attr_timestamp = posted_ts
146 # sql_attr_timestamp = last_edited_ts
147 sql_attr_timestamp = date_added
149 # string ordinal attribute declaration
150 # multi-value (an arbitrary number of attributes is allowed), optional
151 # sorts strings (bytewise), and stores their indexes in the sorted list
152 # sorting by this attr is equivalent to sorting by the original strings
154 # sql_attr_str2ordinal = author_name
157 # floating point attribute declaration
158 # multi-value (an arbitrary number of attributes is allowed), optional
159 # values are stored in single precision, 32-bit IEEE 754 format
161 # sql_attr_float = lat_radians
162 # sql_attr_float = long_radians
165 # multi-valued attribute (MVA) attribute declaration
166 # multi-value (an arbitrary number of attributes is allowed), optional
167 # MVA values are variable length lists of unsigned 32-bit integers
169 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
170 # ATTR-TYPE is 'uint' or 'timestamp'
171 # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
172 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
173 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
175 # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
176 # sql_attr_multi = uint tag from ranged-query; \
177 # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
178 # SELECT MIN(id), MAX(id) FROM tags
181 # string attribute declaration
182 # multi-value (an arbitrary number of these is allowed), optional
183 # lets you store and retrieve strings
185 # sql_attr_string = stitle
188 # wordcount attribute declaration
189 # multi-value (an arbitrary number of these is allowed), optional
190 # lets you count the words at indexing time
192 # sql_attr_str2wordcount = stitle
195 # combined field plus attribute declaration (from a single column)
196 # stores column as an attribute, but also indexes it as a full-text field
198 # sql_field_string = author
199 # sql_field_str2wordcount = title
202 # post-query, executed on sql_query completion
203 # optional, default is empty
208 # post-index-query, executed on successful indexing completion
209 # optional, default is empty
210 # $maxid expands to max document ID actually fetched from DB
212 # sql_query_post_index = REPLACE INTO counters ( id, val ) \
213 # VALUES ( 'max_indexed_id', $maxid )
216 # ranged query throttling, in milliseconds
217 # optional, default is 0 which means no delay
218 # enforces given delay before each query step
219 sql_ranged_throttle = 0
221 # document info query, ONLY for CLI search (ie. testing and debugging)
222 # optional, default is empty
223 # must contain $id macro and must fetch the document by that id
224 sql_query_info = SELECT * FROM documents WHERE id=$id
226 # kill-list query, fetches the document IDs for kill-list
227 # k-list will suppress matches from preceding indexes in the same query
228 # optional, default is empty
230 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
233 # columns to unpack on indexer side when indexing
234 # multi-value, optional, default is empty list
236 # unpack_zlib = zlib_column
237 # unpack_mysqlcompress = compressed_column
238 # unpack_mysqlcompress = compressed_column_2
241 # maximum unpacked length allowed in MySQL COMPRESS() unpacker
242 # optional, default is 16M
244 # unpack_mysqlcompress_maxsize = 16M
247 #####################################################################
249 #####################################################################
253 # shell command to invoke xmlpipe stream producer
256 # xmlpipe_command = cat /var/test.xml
258 #####################################################################
260 #####################################################################
263 # xmlpipe_command = cat /var/test2.xml
266 # xmlpipe2 field declaration
267 # multi-value, optional, default is empty
269 # xmlpipe_field = subject
270 # xmlpipe_field = content
273 # xmlpipe2 attribute declaration
274 # multi-value, optional, default is empty
275 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
277 # xmlpipe_attr_timestamp = published
278 # xmlpipe_attr_uint = author_id
281 # perform UTF-8 validation, and filter out incorrect codes
282 # avoids XML parser choking on non-UTF-8 documents
283 # optional, default is 0
285 # xmlpipe_fixup_utf8 = 1
289 # inherited source example
291 # all the parameters are copied from the parent source,
292 # and may then be overridden in this source definition
293 source src1throttled : src1
295 sql_ranged_throttle = 100
298 #############################################################################
300 #############################################################################
302 # local index example
304 # this is an index which is stored locally in the filesystem
306 # all indexing-time options (such as morphology and charsets)
307 # are configured per local index
311 # optional, default is 'plain'
312 # known values are 'plain', 'distributed', and 'rt' (see samples below)
315 # document source(s) to index
316 # multi-value, mandatory
317 # document IDs must be globally unique across all sources
320 # index files path and file name, without extension
321 # mandatory, path must be writable, extensions will be auto-appended
324 # document attribute values (docinfo) storage mode
325 # optional, default is 'extern'
326 # known values are 'none', 'extern' and 'inline'
329 # memory locking for cached data (.spa and .spi), to prevent swapping
330 # optional, default is 0 (do not mlock)
331 # requires searchd to be run from root
334 # a list of morphology preprocessors to apply
335 # optional, default is empty
337 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
338 # 'soundex', and 'metaphone'; additional preprocessors available from
339 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
340 # (see libstemmer_c/libstemmer/modules.txt)
342 # morphology = stem_en, stem_ru, soundex
343 # morphology = libstemmer_german
344 # morphology = libstemmer_sv
347 # minimum word length at which to enable stemming
348 # optional, default is 1 (stem everything)
350 # min_stemming_len = 1
353 # stopword files list (space separated)
354 # optional, default is empty
355 # contents are plain text, charset_table and stemming are both applied
357 # stopwords = data/stopwords.txt
360 # wordforms file, in "mapfrom > mapto" plain text format
361 # optional, default is empty
363 # wordforms = data/wordforms.txt
366 # tokenizing exceptions file
367 # optional, default is empty
369 # plain text, case sensitive, space insensitive in map-from part
370 # one "Map Several Words => ToASingleOne" entry per line
372 # exceptions = data/exceptions.txt
375 # minimum indexed word length
376 # default is 1 (index everything)
379 # charset encoding type
380 # optional, default is 'sbcs'
381 # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
384 # charset definition and case folding rules "table"
385 # optional, default value depends on charset_type
387 # defaults are configured to include English and Russian characters only
388 # you need to change the table to include additional ones
389 # this behavior MAY change in future versions
391 # 'sbcs' default value is
392 # charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
394 # 'utf-8' default value is
395 # charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
398 # ignored characters list
399 # optional, default value is empty
401 # ignore_chars = U+00AD
404 # minimum word prefix length to index
405 # optional, default is 0 (do not index prefixes)
410 # minimum word infix length to index
411 # optional, default is 0 (do not index infixes)
416 # list of fields to limit prefix/infix indexing to
417 # optional, default value is empty (index all fields in prefix/infix mode)
419 # prefix_fields = filename
420 # infix_fields = url, domain
423 # enable star-syntax (wildcards) when searching prefix/infix indexes
424 # search-time only, does not affect indexing, can be 0 or 1
425 # optional, default is 0 (do not use wildcard syntax)
430 # expand keywords with exact forms and/or stars when searching fit indexes
431 # search-time only, does not affect indexing, can be 0 or 1
432 # optional, default is 0 (do not expand keywords)
434 # expand_keywords = 1
437 # n-gram length to index, for CJK indexing
438 # only supports 0 and 1 for now, other lengths to be implemented
439 # optional, default is 0 (disable n-grams)
444 # n-gram characters list, for CJK indexing
445 # optional, default is empty
447 # ngram_chars = U+3000..U+2FA1F
450 # phrase boundary characters list
451 # optional, default is empty
453 # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
456 # phrase boundary word position increment
457 # optional, default is 0
459 # phrase_boundary_step = 100
462 # blended characters list
463 # blended chars are indexed both as separators and valid characters
464 # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
465 # optional, default is empty
467 # blend_chars = +, &, U+23
470 # whether to strip HTML tags from incoming documents
471 # known values are 0 (do not strip) and 1 (do strip)
472 # optional, default is 0
475 # what HTML attributes to index if stripping HTML
476 # optional, default is empty (do not index anything)
478 # html_index_attrs = img=alt,title; a=title;
481 # what HTML elements contents to strip
482 # optional, default is empty (do not strip element contents)
484 # html_remove_elements = style, script
487 # whether to preopen index data files on startup
488 # optional, default is 0 (do not preopen), searchd-only
493 # whether to keep dictionary (.spi) on disk, or cache it in RAM
494 # optional, default is 0 (cache in RAM), searchd-only
499 # whether to enable in-place inversion (2x less disk, 90-95% speed)
500 # optional, default is 0 (use separate temporary files), indexer-only
505 # in-place fine-tuning options
506 # optional, defaults are listed below
508 # inplace_hit_gap = 0 # preallocated hitlist gap size
509 # inplace_docinfo_gap = 0 # preallocated docinfo gap size
510 # inplace_reloc_factor = 0.1 # relocation buffer size within arena
511 # inplace_write_factor = 0.1 # write buffer size within arena
514 # whether to index original keywords along with stemmed versions
515 # enables "=exactform" operator to work
516 # optional, default is 0
518 # index_exact_words = 1
521 # position increment on overshort (less that min_word_len) words
522 # optional, allowed values are 0 and 1, default is 1
527 # position increment on stopword
528 # optional, allowed values are 0 and 1, default is 1
534 # positions for these keywords will not be stored in the index
535 # optional, allowed values are 'all', or a list file name
537 # hitless_words = all
538 # hitless_words = hitless.txt
542 # inherited index example
544 # all the parameters are copied from the parent index,
545 # and may then be overridden in this index definition
546 index test1stemmed : test1
548 path = data/test1stemmed
553 # distributed index example
555 # this is a virtual index which can NOT be directly indexed,
556 # and only contains references to other local and/or remote indexes
559 # 'distributed' index type MUST be specified
562 # local index to be searched
563 # there can be many local indexes configured
568 # multiple remote agents may be specified
569 # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
570 # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
571 agent = localhost:9313:remote1
572 agent = localhost:9314:remote2,remote3
573 # agent = run/searchd.sock:remote4
575 # blackhole remote agent, for debugging/testing
576 # network errors and search results will be ignored
578 # agent_blackhole = testbox:9312:testindex1,testindex2
581 # remote agent connection timeout, milliseconds
582 # optional, default is 1000 ms, ie. 1 sec
583 agent_connect_timeout = 1000
585 # remote agent query timeout, milliseconds
586 # optional, default is 3000 ms, ie. 3 sec
587 agent_query_timeout = 3000
591 # realtime index example
593 # you can run INSERT, REPLACE, and DELETE on this index on the fly
594 # using MySQL protocol (see 'listen' directive below)
597 # 'rt' index type must be specified to use RT index
600 # index files path and file name, without extension
601 # mandatory, path must be writable, extensions will be auto-appended
604 # RAM chunk size limit
605 # RT index will keep at most this much data in RAM, then flush to disk
606 # optional, default is 32M
608 # rt_mem_limit = 512M
610 # full-text field declaration
611 # multi-value, mandatory
615 # unsigned integer attribute declaration
616 # multi-value (an arbitrary number of attributes is allowed), optional
617 # declares an unsigned 32-bit attribute
620 # bigint attribute declaration
621 # multi-value (an arbitrary number of attributes is allowed), optional
622 # declares a signed 64-bit attribute
624 # rt_attr_bigint = guid
627 # floating point attribute declaration
628 # multi-value (an arbitrary number of attributes is allowed), optional
629 # declares a single precision, 32-bit IEEE 754 format float attribute
631 # rt_attr_float = gpa
634 #############################################################################
636 #############################################################################
640 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
641 # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
644 # maximum IO calls per second (for I/O throttling)
645 # optional, default is 0 (unlimited)
650 # maximum IO call size, bytes (for I/O throttling)
651 # optional, default is 0 (unlimited)
653 # max_iosize = 1048576
656 # maximum xmlpipe2 field length, bytes
657 # optional, default is 2M
659 # max_xmlpipe2_field = 4M
662 # write buffer size, bytes
663 # several (currently up to 4) buffers will be allocated
664 # write buffers are allocated in addition to mem_limit
665 # optional, default is 1M
670 #############################################################################
672 #############################################################################
676 # hostname, port, or hostname:port, or /unix/socket/path to listen on
677 # multi-value, multiple listen points are allowed
678 # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
681 # listen = 192.168.0.1:9312
683 # listen = run/searchd.sock
684 listen = localhost:9306:mysql41
687 # log file, searchd run info is logged here
688 # optional, default is 'searchd.log'
689 log = log/searchd.log
691 # query log file, all search queries are logged here
692 # optional, default is empty (do not log queries)
693 query_log = log/query.log
695 # client read timeout, seconds
696 # optional, default is 5
699 # request timeout, seconds
700 # optional, default is 5 minutes
703 # maximum amount of children to fork (concurrent searches to run)
704 # optional, default is 0 (unlimited)
707 # PID file, searchd process ID file name
709 pid_file = log/searchd.pid
711 # max amount of matches the daemon ever keeps in RAM, per-index
712 # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
713 # default is 1000 (just like Google)
716 # seamless rotate, prevents rotate stalls if precaching huge datasets
717 # optional, default is 1
720 # whether to forcibly preopen all indexes on startup
721 # optional, default is 0 (do not preopen)
724 # whether to unlink .old index copies on succesful rotation.
725 # optional, default is 1 (do unlink)
728 # attribute updates periodic flush timeout, seconds
729 # updates will be automatically dumped to disk this frequently
730 # optional, default is 0 (disable periodic flush)
732 # attr_flush_period = 900
735 # instance-wide ondisk_dict defaults (per-index value take precedence)
736 # optional, default is 0 (precache all dictionaries in RAM)
738 # ondisk_dict_default = 1
741 # MVA updates pool size
742 # shared between all instances of searchd, disables attr flushes!
743 # optional, default size is 1M
744 mva_updates_pool = 1M
746 # max allowed network packet size
747 # limits both query packets from clients, and responses from agents
748 # optional, default size is 8M
752 # searchd will (try to) log crashed query to 'crash_log_path.PID' file
753 # optional, default is empty (do not create crash logs)
755 # crash_log_path = log/crash
758 # max allowed per-query filter count
759 # optional, default is 256
762 # max allowed per-filter values count
763 # optional, default is 4096
764 max_filter_values = 4096
767 # socket listen queue length
768 # optional, default is 5
773 # per-keyword read buffer size
774 # optional, default is 256K
779 # unhinted read size (currently used when reading hits)
780 # optional, default is 32K
782 # read_unhinted = 32K
785 # max allowed per-batch query count (aka multi-query count)
786 # optional, default is 32
787 max_batch_queries = 32
790 # max common subtree document cache size, per-query
791 # optional, default is 512K, 0 means disable subtree optimization
793 # subtree_docs_cache = 4M
796 # max common subtree hit cache size, per-query
797 # optional, default is 1M, 0 means disable subtree optimization
799 # subtree_hits_cache = 8M
802 # multi-processing mode (MPM)
803 # known values are none, fork, prefork, and threads
804 # optional, default is fork
810 # max threads to create for searching local parts of a distributed index
811 # optional, default is 0, which means disable multi-threaded searching
812 # should work with all MPMs (ie. does NOT require workers=threads)