# lets you store and retrieve strings
#
# sql_attr_string = stitle
+ sql_attr_string = hash
+ sql_attr_string = parent
+ sql_attr_string = subject
+# sql_field_str2wordcount = subject
# wordcount attribute declaration
}
# --eof--
+
+source git
+{
+ # data source type. mandatory, no default value
+ # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
+ type = mysql
+
+ #####################################################################
+ ## SQL settings (for 'mysql' and 'pgsql' types)
+ #####################################################################
+
+ # some straightforward parameters for SQL source types
+ sql_host = localhost
+ sql_user = dpavlin
+ sql_pass =
+ sql_db = git
+ sql_port = 3306 # optional, default is 3306
+
+ # UNIX socket name
+ # optional, default is empty (reuse client library defaults)
+ # usually '/var/lib/mysql/mysql.sock' on Linux
+ # usually '/tmp/mysql.sock' on FreeBSD
+ #
+ # sql_sock = /tmp/mysql.sock
+
+
+ # MySQL specific client connection flags
+ # optional, default is 0
+ #
+ # mysql_connect_flags = 32 # enable compression
+
+ # MySQL specific SSL certificate settings
+ # optional, defaults are empty
+ #
+ # mysql_ssl_cert = /etc/ssl/client-cert.pem
+ # mysql_ssl_key = /etc/ssl/client-key.pem
+ # mysql_ssl_ca = /etc/ssl/cacert.pem
+
+ # MS SQL specific Windows authentication mode flag
+ # MUST be in sync with charset_type index-level setting
+ # optional, default is 0
+ #
+ # mssql_winauth = 1 # use currently logged on user credentials
+
+
+ # MS SQL specific Unicode indexing flag
+ # optional, default is 0 (request SBCS data)
+ #
+ # mssql_unicode = 1 # request Unicode data from server
+
+
+ # ODBC specific DSN (data source name)
+ # mandatory for odbc source type, no default value
+ #
+ # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
+ # sql_query = SELECT id, data FROM documents.csv
+
+
+ # pre-query, executed before the main fetch query
+ # multi-value, optional, default is empty list of queries
+ #
+ # sql_query_pre = SET NAMES utf8
+ # sql_query_pre = SET SESSION query_cache_type=OFF
+
+
+ # main document fetch query
+ # mandatory, integer document ID field MUST be the first selected column
+ sql_query = \
+ SELECT id, hash, parent, UNIX_TIMESTAMP(timestamp) AS timestamp, subject \
+ FROM log
+
+
+ # joined/payload field fetch query
+ # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
+ # payload fields let you attach custom per-keyword values (eg. for ranking)
+ #
+ # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
+ # joined field QUERY should return 2 columns (docid, text)
+ # payload field QUERY should return 3 columns (docid, keyword, weight)
+ #
+ # REQUIRES that query results are in ascending document ID order!
+ # multi-value, optional, default is empty list of queries
+ #
+ # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
+ # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
+
+
+ # range query setup, query that must return min and max ID values
+ # optional, default is empty
+ #
+ # sql_query will need to reference $start and $end boundaries
+ # if using ranged query:
+ #
+ # sql_query = \
+ # SELECT doc.id, doc.id AS group, doc.title, doc.data \
+ # FROM documents doc \
+ # WHERE id>=$start AND id<=$end
+ #
+ # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
+
+
+ # range query step
+ # optional, default is 1024
+ #
+ # sql_range_step = 1000
+
+
+ # unsigned integer attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # optional bit size can be specified, default is 32
+ #
+ # sql_attr_uint = author_id
+ # sql_attr_uint = forum_id:9 # 9 bits for forum_id
+ #sql_attr_uint = group_id
+
+ # boolean attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # equivalent to sql_attr_uint with 1-bit size
+ #
+ # sql_attr_bool = is_deleted
+
+
+ # bigint attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # declares a signed (unlike uint!) 64-bit attribute
+ #
+ # sql_attr_bigint = my_bigint_id
+
+
+ # UNIX timestamp attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # similar to integer, but can also be used in date functions
+ #
+ # sql_attr_timestamp = posted_ts
+ # sql_attr_timestamp = last_edited_ts
+ sql_attr_timestamp = timestamp
+
+ # string ordinal attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # sorts strings (bytewise), and stores their indexes in the sorted list
+ # sorting by this attr is equivalent to sorting by the original strings
+ #
+ # sql_attr_str2ordinal = author_name
+
+
+ # floating point attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # values are stored in single precision, 32-bit IEEE 754 format
+ #
+ # sql_attr_float = lat_radians
+ # sql_attr_float = long_radians
+
+
+ # multi-valued attribute (MVA) attribute declaration
+ # multi-value (an arbitrary number of attributes is allowed), optional
+ # MVA values are variable length lists of unsigned 32-bit integers
+ #
+ # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
+ # ATTR-TYPE is 'uint' or 'timestamp'
+ # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
+ # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
+ # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
+ #
+ # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
+ # sql_attr_multi = uint tag from ranged-query; \
+ # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
+ # SELECT MIN(id), MAX(id) FROM tags
+
+
+ # string attribute declaration
+ # multi-value (an arbitrary number of these is allowed), optional
+ # lets you store and retrieve strings
+ #
+ # sql_attr_string = stitle
+
+
+ # wordcount attribute declaration
+ # multi-value (an arbitrary number of these is allowed), optional
+ # lets you count the words at indexing time
+ #
+ # sql_attr_str2wordcount = stitle
+
+
+ # combined field plus attribute declaration (from a single column)
+ # stores column as an attribute, but also indexes it as a full-text field
+ #
+ # sql_field_string = author
+ # sql_field_str2wordcount = title
+
+
+ # post-query, executed on sql_query completion
+ # optional, default is empty
+ #
+ # sql_query_post =
+
+
+ # post-index-query, executed on successful indexing completion
+ # optional, default is empty
+ # $maxid expands to max document ID actually fetched from DB
+ #
+ # sql_query_post_index = REPLACE INTO counters ( id, val ) \
+ # VALUES ( 'max_indexed_id', $maxid )
+
+
+ # ranged query throttling, in milliseconds
+ # optional, default is 0 which means no delay
+ # enforces given delay before each query step
+ sql_ranged_throttle = 0
+
+ # document info query, ONLY for CLI search (ie. testing and debugging)
+ # optional, default is empty
+ # must contain $id macro and must fetch the document by that id
+ sql_query_info = SELECT * FROM log WHERE id=$id
+
+ # kill-list query, fetches the document IDs for kill-list
+ # k-list will suppress matches from preceding indexes in the same query
+ # optional, default is empty
+ #
+ # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
+
+
+ # columns to unpack on indexer side when indexing
+ # multi-value, optional, default is empty list
+ #
+ # unpack_zlib = zlib_column
+ # unpack_mysqlcompress = compressed_column
+ # unpack_mysqlcompress = compressed_column_2
+
+
+ # maximum unpacked length allowed in MySQL COMPRESS() unpacker
+ # optional, default is 16M
+ #
+ # unpack_mysqlcompress_maxsize = 16M
+
+
+ #####################################################################
+ ## xmlpipe settings
+ #####################################################################
+
+ # type = xmlpipe
+
+ # shell command to invoke xmlpipe stream producer
+ # mandatory
+ #
+ # xmlpipe_command = cat /var/test.xml
+
+ #####################################################################
+ ## xmlpipe2 settings
+ #####################################################################
+
+ # type = xmlpipe2
+ # xmlpipe_command = cat /var/test2.xml
+
+
+ # xmlpipe2 field declaration
+ # multi-value, optional, default is empty
+ #
+ # xmlpipe_field = subject
+ # xmlpipe_field = content
+
+
+ # xmlpipe2 attribute declaration
+ # multi-value, optional, default is empty
+ # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
+ #
+ # xmlpipe_attr_timestamp = published
+ # xmlpipe_attr_uint = author_id
+
+
+ # perform UTF-8 validation, and filter out incorrect codes
+ # avoids XML parser choking on non-UTF-8 documents
+ # optional, default is 0
+ #
+ # xmlpipe_fixup_utf8 = 1
+}
+
+#############################################################################
+## index definition
+#############################################################################
+
+# local index example
+#
+# this is an index which is stored locally in the filesystem
+#
+# all indexing-time options (such as morphology and charsets)
+# are configured per local index
+index git
+{
+ # index type
+ # optional, default is 'plain'
+ # known values are 'plain', 'distributed', and 'rt' (see samples below)
+ # type = plain
+
+ # document source(s) to index
+ # multi-value, mandatory
+ # document IDs must be globally unique across all sources
+ source = git
+
+ # index files path and file name, without extension
+ # mandatory, path must be writable, extensions will be auto-appended
+ path = data/git
+
+ # document attribute values (docinfo) storage mode
+ # optional, default is 'extern'
+ # known values are 'none', 'extern' and 'inline'
+ docinfo = inline
+
+ # memory locking for cached data (.spa and .spi), to prevent swapping
+ # optional, default is 0 (do not mlock)
+ # requires searchd to be run from root
+ mlock = 0
+
+ # a list of morphology preprocessors to apply
+ # optional, default is empty
+ #
+ # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
+ # 'soundex', and 'metaphone'; additional preprocessors available from
+ # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
+ # (see libstemmer_c/libstemmer/modules.txt)
+ #
+ # morphology = stem_en, stem_ru, soundex
+ # morphology = libstemmer_german
+ # morphology = libstemmer_sv
+ morphology = none
+
+ # minimum word length at which to enable stemming
+ # optional, default is 1 (stem everything)
+ #
+ # min_stemming_len = 1
+
+
+ # stopword files list (space separated)
+ # optional, default is empty
+ # contents are plain text, charset_table and stemming are both applied
+ #
+ # stopwords = data/stopwords.txt
+
+
+ # wordforms file, in "mapfrom > mapto" plain text format
+ # optional, default is empty
+ #
+ # wordforms = data/wordforms.txt
+
+
+ # tokenizing exceptions file
+ # optional, default is empty
+ #
+ # plain text, case sensitive, space insensitive in map-from part
+ # one "Map Several Words => ToASingleOne" entry per line
+ #
+ # exceptions = data/exceptions.txt
+
+
+ # minimum indexed word length
+ # default is 1 (index everything)
+ min_word_len = 1
+
+ # charset encoding type
+ # optional, default is 'sbcs'
+ # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
+ charset_type = sbcs
+
+ # charset definition and case folding rules "table"
+ # optional, default value depends on charset_type
+ #
+ # defaults are configured to include English and Russian characters only
+ # you need to change the table to include additional ones
+ # this behavior MAY change in future versions
+ #
+ # 'sbcs' default value is
+ # charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
+ #
+ # 'utf-8' default value is
+ # charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
+
+
+ # ignored characters list
+ # optional, default value is empty
+ #
+ # ignore_chars = U+00AD
+
+
+ # minimum word prefix length to index
+ # optional, default is 0 (do not index prefixes)
+ #
+ # min_prefix_len = 0
+
+
+ # minimum word infix length to index
+ # optional, default is 0 (do not index infixes)
+ #
+ # min_infix_len = 0
+
+
+ # list of fields to limit prefix/infix indexing to
+ # optional, default value is empty (index all fields in prefix/infix mode)
+ #
+ # prefix_fields = filename
+ # infix_fields = url, domain
+
+
+ # enable star-syntax (wildcards) when searching prefix/infix indexes
+ # search-time only, does not affect indexing, can be 0 or 1
+ # optional, default is 0 (do not use wildcard syntax)
+ #
+ # enable_star = 1
+
+
+ # expand keywords with exact forms and/or stars when searching fit indexes
+ # search-time only, does not affect indexing, can be 0 or 1
+ # optional, default is 0 (do not expand keywords)
+ #
+ # expand_keywords = 1
+
+
+ # n-gram length to index, for CJK indexing
+ # only supports 0 and 1 for now, other lengths to be implemented
+ # optional, default is 0 (disable n-grams)
+ #
+ # ngram_len = 1
+
+
+ # n-gram characters list, for CJK indexing
+ # optional, default is empty
+ #
+ # ngram_chars = U+3000..U+2FA1F
+
+
+ # phrase boundary characters list
+ # optional, default is empty
+ #
+ # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
+
+
+ # phrase boundary word position increment
+ # optional, default is 0
+ #
+ # phrase_boundary_step = 100
+
+
+ # blended characters list
+ # blended chars are indexed both as separators and valid characters
+ # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
+ # optional, default is empty
+ #
+ # blend_chars = +, &, U+23
+
+
+ # whether to strip HTML tags from incoming documents
+ # known values are 0 (do not strip) and 1 (do strip)
+ # optional, default is 0
+ html_strip = 0
+
+ # what HTML attributes to index if stripping HTML
+ # optional, default is empty (do not index anything)
+ #
+ # html_index_attrs = img=alt,title; a=title;
+
+
+ # what HTML elements contents to strip
+ # optional, default is empty (do not strip element contents)
+ #
+ # html_remove_elements = style, script
+
+
+ # whether to preopen index data files on startup
+ # optional, default is 0 (do not preopen), searchd-only
+ #
+ # preopen = 1
+
+
+ # whether to keep dictionary (.spi) on disk, or cache it in RAM
+ # optional, default is 0 (cache in RAM), searchd-only
+ #
+ # ondisk_dict = 1
+
+
+ # whether to enable in-place inversion (2x less disk, 90-95% speed)
+ # optional, default is 0 (use separate temporary files), indexer-only
+ #
+ # inplace_enable = 1
+
+
+ # in-place fine-tuning options
+ # optional, defaults are listed below
+ #
+ # inplace_hit_gap = 0 # preallocated hitlist gap size
+ # inplace_docinfo_gap = 0 # preallocated docinfo gap size
+ # inplace_reloc_factor = 0.1 # relocation buffer size within arena
+ # inplace_write_factor = 0.1 # write buffer size within arena
+
+
+ # whether to index original keywords along with stemmed versions
+ # enables "=exactform" operator to work
+ # optional, default is 0
+ #
+ # index_exact_words = 1
+
+
+ # position increment on overshort (less that min_word_len) words
+ # optional, allowed values are 0 and 1, default is 1
+ #
+ # overshort_step = 1
+
+
+ # position increment on stopword
+ # optional, allowed values are 0 and 1, default is 1
+ #
+ # stopword_step = 1
+
+
+ # hitless words list
+ # positions for these keywords will not be stored in the index
+ # optional, allowed values are 'all', or a list file name
+ #
+ # hitless_words = all
+ # hitless_words = hitless.txt
+}
+
+