public class URLTextInputter extends IsCloseableObject implements TextInputter
Strips tags naively from files which appear to contain HTML text.
Modifier and Type | Field and Description |
---|---|
protected java.lang.String |
loadedText
The loaded text.
|
Constructor and Description |
---|
URLTextInputter()
Create URL text inputter.
|
Modifier and Type | Method and Description |
---|---|
void |
enableGapFixer(boolean fixGaps)
Enable gap element fixer.
|
void |
enableOrigFixer(boolean fixOrig)
Enable orig element fixer.
|
void |
enableSplitWordsFixer(boolean fixSplitWords,
java.util.List<PatternReplacer> patternReplacers)
Enable split words fixer.
|
int |
getSegmentCount()
Returns number of text segments.
|
java.lang.String |
getSegmentName(int segmentNumber)
Returns name of specified segment.
|
java.lang.String |
getSegmentText(int segmentNumber)
Returns specified segment of loaded text.
|
java.lang.String |
getSegmentText(java.lang.String segmentName)
Returns specified segment of loaded text.
|
void |
loadText(java.lang.String str)
Reads text from a String.
|
void |
loadText(java.lang.String str,
java.lang.String xmlSchemaURI)
Reads text from a string using a specified XML schema.
|
void |
loadText(java.net.URL url,
java.lang.String encoding)
Loads text from a URL.
|
void |
loadText(java.net.URL url,
java.lang.String encoding,
java.lang.String xmlSchemaURI)
Loads text from a URL.
|
void |
setSegmentText(int segmentNumber,
java.io.File segmentTextFile)
Updates specified segment of loaded text from file.
|
void |
setSegmentText(int segmentNumber,
java.lang.String segmentText)
Updates specified segment of loaded text.
|
void |
setSegmentText(java.lang.String segmentName,
java.io.File segmentTextFile)
Returns specified segment of loaded text.
|
void |
setSegmentText(java.lang.String segmentName,
java.lang.String segmentText)
Returns specified segment of loaded text.
|
boolean |
usesSegmentFiles()
Does inputter use segment files?
|
close
public void loadText(java.net.URL url, java.lang.String encoding) throws java.io.IOException
loadText
in interface TextInputter
url
- URL from which to read text.encoding
- Text encoding.java.io.IOException
- If an output error occurs.public void loadText(java.net.URL url, java.lang.String encoding, java.lang.String xmlSchemaURI) throws java.io.IOException
loadText
in interface TextInputter
url
- URL from which to read text.encoding
- Text encoding.xmlSchemaURI
- XML schema (ignored).java.io.IOException
- If an output error occurs.public void loadText(java.lang.String str) throws java.lang.Exception
loadText
in interface TextInputter
str
- String from which to read text.java.io.IOException
- If an error occurs.java.lang.Exception
public void loadText(java.lang.String str, java.lang.String xmlSchemaURI) throws java.lang.Exception
loadText
in interface TextInputter
str
- String from which to read text.xmlSchemaURI
- String URI specifying Xml schema.java.lang.Exception
- If an error occurs.
The schema and schema type should be ignored when the input is not an XML file.
public int getSegmentCount()
getSegmentCount
in interface TextInputter
public java.lang.String getSegmentName(int segmentNumber)
getSegmentName
in interface TextInputter
segmentNumber
- The segment number (starts at 0).public java.lang.String getSegmentText(int segmentNumber)
getSegmentText
in interface TextInputter
segmentNumber
- The segment number (starts at 0).public java.lang.String getSegmentText(java.lang.String segmentName)
getSegmentText
in interface TextInputter
segmentName
- The segment name.public void setSegmentText(int segmentNumber, java.lang.String segmentText)
setSegmentText
in interface TextInputter
segmentNumber
- The segment number (starts at 0).segmentText
- The updated segment text.public void setSegmentText(java.lang.String segmentName, java.lang.String segmentText)
setSegmentText
in interface TextInputter
segmentName
- The segment name.segmentText
- The updated segment text.public void setSegmentText(int segmentNumber, java.io.File segmentTextFile)
setSegmentText
in interface TextInputter
segmentNumber
- The segment number (starts at 0).segmentTextFile
- The file containing the updated segment text.public void setSegmentText(java.lang.String segmentName, java.io.File segmentTextFile)
setSegmentText
in interface TextInputter
segmentName
- The segment name.segmentTextFile
- The file containing the updated segment text.public void enableGapFixer(boolean fixGaps)
enableGapFixer
in interface TextInputter
fixGaps
- true to fix gap tags.
No-op here.
public void enableOrigFixer(boolean fixOrig)
enableOrigFixer
in interface TextInputter
fixOrig
- true to fix orig tags.
No-op here.
public void enableSplitWordsFixer(boolean fixSplitWords, java.util.List<PatternReplacer> patternReplacers)
enableSplitWordsFixer
in interface TextInputter
fixSplitWords
- true to fix selected split words.patternReplacers
- Patterns for fixing split words.
No-op here.
public boolean usesSegmentFiles()
usesSegmentFiles
in interface TextInputter