public abstract class AbstractLemmatizer extends IsCloseableObject implements Lemmatizer, UsesLogger
Modifier and Type | Field and Description |
---|---|
protected java.util.Set<java.lang.String> |
dictionary
The dictionary.
|
protected char |
lemmaSeparator
Default lemma separator is vertical bar character,
|
protected java.lang.String |
lemmaSeparatorString |
protected Lexicon |
lexicon
The lexicon.
|
protected Logger |
logger
Logger used for output.
|
Constructor and Description |
---|
AbstractLemmatizer() |
Modifier and Type | Method and Description |
---|---|
boolean |
cantLemmatize(java.lang.String spelling)
Check for words that cannot be lemmatized.
|
int |
countLemmata(java.lang.String lemma)
Get number of lemmata comprising this lemma.
|
java.lang.String |
getLemmaSeparator()
Get the lemma separator string,
|
Logger |
getLogger()
Get the logger.
|
boolean |
isCompoundLemma(java.lang.String lemma)
Check if lemma is compound lemma.
|
java.lang.String |
joinLemmata(java.lang.String[] lemmata)
Join separate lemmata into a compound lemma.
|
java.lang.String |
joinLemmata(java.lang.String[] lemmata,
java.lang.String separator)
Join separate lemmata into a compound lemma.
|
abstract java.lang.String |
lemmatize(java.lang.String spelling)
Returns a lemma given a spelling.
|
abstract java.lang.String |
lemmatize(java.lang.String spelling,
java.lang.String wordClass)
Returns a lemma given a spelling and a part of speech.
|
void |
setDictionary(java.util.Set<java.lang.String> dictionary)
Set the dictionary for checking lemmata.
|
void |
setLexicon(Lexicon lexicon)
Set the lexicon.
|
void |
setLogger(Logger logger)
Set the logger.
|
java.lang.String[] |
splitLemma(java.lang.String lemma)
Split compound lemma into separate lemmata.
|
close
protected char lemmaSeparator
protected java.lang.String lemmaSeparatorString
protected Logger logger
protected Lexicon lexicon
protected java.util.Set<java.lang.String> dictionary
public Logger getLogger()
getLogger
in interface UsesLogger
public void setLogger(Logger logger)
setLogger
in interface UsesLogger
logger
- The logger.public void setLexicon(Lexicon lexicon)
setLexicon
in interface Lemmatizer
lexicon
- The lexicon.public void setDictionary(java.util.Set<java.lang.String> dictionary)
setDictionary
in interface Lemmatizer
dictionary
- The dictionary as a string set.
May be null.public abstract java.lang.String lemmatize(java.lang.String spelling)
lemmatize
in interface Lemmatizer
spelling
- The spelling.public abstract java.lang.String lemmatize(java.lang.String spelling, java.lang.String wordClass)
lemmatize
in interface Lemmatizer
spelling
- The spelling.wordClass
- The word class.
The word class should be a major word class as defined in
PartOfSpeech
.
public boolean cantLemmatize(java.lang.String spelling)
cantLemmatize
in interface Lemmatizer
spelling
- The spelling to be lemmatized.public java.lang.String getLemmaSeparator()
getLemmaSeparator
in interface Lemmatizer
public java.lang.String joinLemmata(java.lang.String[] lemmata, java.lang.String separator)
joinLemmata
in interface Lemmatizer
lemmata
- String array of lemmata.separator
- String to separate lemmata.public java.lang.String joinLemmata(java.lang.String[] lemmata)
joinLemmata
in interface Lemmatizer
lemmata
- String array of part of speech lemmas.public java.lang.String[] splitLemma(java.lang.String lemma)
splitLemma
in interface Lemmatizer
lemma
- The compound lemma.public boolean isCompoundLemma(java.lang.String lemma)
isCompoundLemma
in interface Lemmatizer
lemma
- The lemma.public int countLemmata(java.lang.String lemma)
countLemmata
in interface Lemmatizer
lemma
- The lemma.