public class FixXMLQuotes
extends java.lang.Object
Usage:
java edu.northwestern.at.morphadorner.tools.fixquotes.FixXMLQuotes softtags.txt jumptags.txt outputdirectory input1.xml input2.xml ...
softtags.txt -- text file containing list of soft XML tags, one per line.
jumptags.txt -- text file containing list of jump XML tags, one per line.
outputdirectory -- output directory to receive xml files with quotes fixed.
input*.xml -- input TEI XML files.
Since the "quotification" relies on heuristics, not all quotes will be converted correctly.
Modifier and Type | Field and Description |
---|---|
protected static java.lang.String |
ap
Temporary apostrophe marker.
|
protected static java.lang.String |
apos
Apostrophereplacement text.
|
protected static TaggedStrings |
contractions
Contractions.
|
protected static java.util.regex.Matcher |
contractionsMatcher
Pattern matcher for matching contractions.
|
protected static int |
currentDocNumber
Current document.
|
protected static boolean |
debug
True for debugging output.
|
protected static int |
docsToProcess
Number of documents to process.
|
protected static org.w3c.dom.Document |
document
DOM document.
|
protected static java.lang.String |
dq
Temporary double quote marker.
|
protected static int |
INITPARAMS
# params before input file specs.
|
protected static java.util.Set<java.lang.String> |
jumpTags
Jump tags.
|
protected static java.lang.String |
ldquo
Left double quote replacement text.
|
protected static java.lang.String |
lsquo
Left single quote replacement text.
|
protected static java.lang.String |
outputDirectory
Output directory.
|
protected static java.lang.String |
prevChar
Previous character of last text segment.
|
protected static java.lang.String |
rdquo
Right double quote replacement text.
|
protected static java.lang.String |
rsquo
Right single quote replacement text.
|
protected static java.util.Set<java.lang.String> |
softTags
Soft tags.
|
protected static java.lang.String |
sq
Temporary single quote marker.
|
Modifier | Constructor and Description |
---|---|
protected |
FixXMLQuotes()
Allow overrides but not instantiation.
|
Modifier and Type | Method and Description |
---|---|
protected static boolean |
initialize(java.lang.String[] args)
Initialize.
|
protected static boolean |
isHardTag(java.lang.String tag)
Is tag a hard tag?
|
protected static boolean |
isJumpTag(java.lang.String tag)
Is tag a jump tag?
|
protected static boolean |
isSoftTag(java.lang.String tag)
Is tag a soft tag?
|
static void |
main(java.lang.String[] args)
Main program.
|
protected static int |
processFiles(java.lang.String[] args)
Process files.
|
protected static void |
processOneFile(java.lang.String xmlFileName)
Process one file.
|
protected static void |
terminate(int filesProcessed,
long processingTime)
Terminate.
|
protected static void |
traverse(org.w3c.dom.Node node)
Traverse DOM tree and fix quotes.
|
protected static org.w3c.dom.Document document
protected static final int INITPARAMS
protected static int docsToProcess
protected static int currentDocNumber
protected static java.lang.String outputDirectory
protected static TaggedStrings contractions
protected static java.util.regex.Matcher contractionsMatcher
protected static final java.lang.String lsquo
protected static final java.lang.String ldquo
protected static final java.lang.String rsquo
protected static final java.lang.String rdquo
protected static final java.lang.String apos
protected static final java.lang.String sq
protected static final java.lang.String dq
protected static final java.lang.String ap
protected static java.lang.String prevChar
protected static java.util.Set<java.lang.String> softTags
protected static java.util.Set<java.lang.String> jumpTags
protected static boolean debug
public static void main(java.lang.String[] args)
args
- Program parameters.protected static boolean initialize(java.lang.String[] args)
protected static void processOneFile(java.lang.String xmlFileName)
xmlFileName
- XML input file name.protected static void traverse(org.w3c.dom.Node node)
node
- Root node of tree.protected static int processFiles(java.lang.String[] args)
protected static void terminate(int filesProcessed, long processingTime)
filesProcessed
- Number of files processed.processingTime
- Processing time in seconds.protected static boolean isSoftTag(java.lang.String tag)
tag
- The XML tag.protected static boolean isJumpTag(java.lang.String tag)
tag
- The XML tag.protected static boolean isHardTag(java.lang.String tag)
tag
- The XML tag.