public class CountDividedWords
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected static int |
currentFileNumber
Current document.
|
protected static java.util.Set<java.lang.String> |
dividedWords
Tokens containing break marker.
|
protected static java.lang.String |
dividedWordsFileName
Divided words file name.
|
protected static int |
filesToProcess
Number of documents to process.
|
protected static int |
INITPARAMS
# params before input file specs.
|
protected static java.util.regex.Matcher |
partWordMatcher
Pattern to match partial word's word ID.
|
protected static java.io.PrintStream |
printStream
Wrapper for printStream to allow utf-8 output.
|
protected static int |
totalWords
Total words found.
|
protected static java.util.Map<java.lang.String,java.lang.Number> |
wordsAndCounts
Tokens and counts.
|
protected static java.lang.String |
wordsAndCountsFileName
Words and counts file name.
|
Constructor and Description |
---|
CountDividedWords() |
Modifier and Type | Method and Description |
---|---|
protected static java.util.List<java.lang.String> |
getWordPartIDs(AdornedWordsLoader adornedWordsLoader,
java.lang.String wordID)
Get all word IDs for a split word.
|
protected static java.lang.String |
getWordText(AdornedWordsLoader adornedWordsLoader,
java.lang.String wordID)
Get word text.
|
protected static boolean |
initialize(java.lang.String[] args)
Initialize.
|
protected static boolean |
isFirstWordPart(java.lang.String wordID)
Is word first part of split word?
|
static void |
main(java.lang.String[] args)
Main program.
|
protected static int |
processFiles(java.lang.String[] args)
Process files.
|
protected static void |
processOneFile(java.lang.String xmlFileName)
Process one file.
|
protected static void |
terminate(int filesProcessed,
long processingTime)
Terminate.
|
protected static final int INITPARAMS
protected static int filesToProcess
protected static int currentFileNumber
protected static int totalWords
protected static java.lang.String dividedWordsFileName
protected static java.lang.String wordsAndCountsFileName
protected static java.io.PrintStream printStream
protected static java.util.Map<java.lang.String,java.lang.Number> wordsAndCounts
protected static java.util.Set<java.lang.String> dividedWords
protected static java.util.regex.Matcher partWordMatcher
public static void main(java.lang.String[] args)
args
- Program parameters.protected static boolean initialize(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
protected static void processOneFile(java.lang.String xmlFileName)
xmlFileName
- XML input file name.protected static boolean isFirstWordPart(java.lang.String wordID)
wordID
- Word ID of possibly split word part.protected static java.lang.String getWordText(AdornedWordsLoader adornedWordsLoader, java.lang.String wordID)
wordID
- Word ID of possibly split word.protected static java.util.List<java.lang.String> getWordPartIDs(AdornedWordsLoader adornedWordsLoader, java.lang.String wordID)
wordID
- Word ID of possibly split word.If the word is not split, the result list contains the single word ID specified by the input value wordID.
protected static int processFiles(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
protected static void terminate(int filesProcessed, long processingTime)
filesProcessed
- Number of files processed.processingTime
- Processing time in seconds.