edu.northwestern.at.morphadorner.tools.findteitextlanguage
Class FindTEITextLanguage

java.lang.Object
  extended by edu.northwestern.at.morphadorner.tools.findteitextlanguage.FindTEITextLanguage

public class FindTEITextLanguage
extends java.lang.Object

Find languages for TEI-encoded text.


Nested Class Summary
static class FindTEITextLanguage.DocData
          Hold language recognition results for one document.
 
Field Summary
protected static int currentFileNumber
          Current document.
protected static int filesToProcess
          Number of documents to process.
protected static int INITPARAMS
          # params before input file specs.
protected static int longestTitle
          Longest document title.
protected static java.util.Set<FindTEITextLanguage.DocData> outputSet
          Holds sorted work titles and languages output.
protected static javax.xml.parsers.SAXParserFactory parserFactory
          SAX parser factory.
protected static LanguageRecognizer recognizer
          Language recognizer.
 
Constructor Summary
FindTEITextLanguage()
           
 
Method Summary
protected static boolean initialize(java.lang.String[] args)
          Initialize.
static void main(java.lang.String[] args)
          Main program.
protected static void outputResults(java.lang.String outputFileName)
          Output results to tabular file.
protected static void printResults()
          Print results.
protected static int processFiles(java.lang.String[] args)
          Process files.
protected static void processOneFile(java.lang.String xmlFileName)
          Process one file.
protected static void terminate(int filesProcessed, long processingTime)
          Terminate.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

recognizer

protected static LanguageRecognizer recognizer
Language recognizer.


INITPARAMS

protected static final int INITPARAMS
# params before input file specs.

See Also:
Constant Field Values

outputSet

protected static java.util.Set<FindTEITextLanguage.DocData> outputSet
Holds sorted work titles and languages output.


parserFactory

protected static javax.xml.parsers.SAXParserFactory parserFactory
SAX parser factory.


filesToProcess

protected static int filesToProcess
Number of documents to process.


currentFileNumber

protected static int currentFileNumber
Current document.


longestTitle

protected static int longestTitle
Longest document title.

Constructor Detail

FindTEITextLanguage

public FindTEITextLanguage()
Method Detail

main

public static void main(java.lang.String[] args)
Main program.

Parameters:
args - Program parameters.

initialize

protected static boolean initialize(java.lang.String[] args)
Initialize.


processOneFile

protected static void processOneFile(java.lang.String xmlFileName)
Process one file.

Parameters:
xmlFileName - Input file name to check for language.

processFiles

protected static int processFiles(java.lang.String[] args)
Process files.


printResults

protected static void printResults()
                            throws java.lang.Exception
Print results.

Throws:
java.lang.Exception

outputResults

protected static void outputResults(java.lang.String outputFileName)
                             throws java.lang.Exception
Output results to tabular file.

Parameters:
outputFileName - Output file name.
Throws:
java.lang.Exception

terminate

protected static void terminate(int filesProcessed,
                                long processingTime)
Terminate.

Parameters:
filesProcessed - Number of files processed.
processingTime - Processing time in seconds.