common/org.gnit.lucenekmp.analysis.ngram/NGramTokenFilter

NGramTokenFilter

class NGramTokenFilter(input: TokenStream, minGram: Int, maxGram: Int, preserveOriginal: Boolean) : TokenFilter

Tokenizes the input into n-grams of the given size(s). As of Lucene 4.4, this token filter:

handles supplementary characters correctly,
emits all n-grams for the same token at the same position,
does not modify offsets,
sorts n-grams by their offset in the original token first, then increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc", "c").

If you were using this [TokenFilter] to perform partial highlighting, this won't work anymore since this filter doesn't update offsets. You should modify your analysis chain to use [NGramTokenizer], and potentially override [NGramTokenizer.isTokenChar] to perform pre-tokenization.

Constructors

NGramTokenFilter

constructor(input: TokenStream, minGram: Int, maxGram: Int, preserveOriginal: Boolean)

constructor(input: TokenStream, gramSize: Int)

Types

object Companion

Properties

attributeClassesIterator

val attributeClassesIterator: Iterator<Any>

attributeFactory

val attributeFactory: AttributeFactory

attributeImplsIterator

val attributeImplsIterator: Iterator<AttributeImpl>

Functions

fun <T : Attribute> addAttribute(attClass: KClass<T>): T

addAttributeImpl

fun addAttributeImpl(att: AttributeImpl)

fun captureState(): AttributeSource.State?

clearAttributes

fun clearAttributes()

cloneAttributes

fun cloneAttributes(): AttributeSource

open override fun close()

fun copyTo(target: AttributeSource)

open override fun end()

fun endAttributes()

open operator override fun equals(obj: Any?): Boolean

fun <T : Attribute> getAttribute(attClass: KClass<T>): T?

fun hasAttribute(attClass: KClass<out Attribute>): Boolean

fun hasAttributes(): Boolean

open override fun hashCode(): Int

override fun incrementToken(): Boolean

reflectAsString

fun reflectAsString(prependAttClass: Boolean): String

fun reflectWith(reflector: AttributeReflector)

removeAllAttributes

fun removeAllAttributes()

open override fun reset()

fun restoreState(state: AttributeSource.State?)

open override fun toString(): String

open override fun unwrap(): TokenStream