public class PseudoPageAdderFilter extends ExtendedXMLFilterImpl
Modifier and Type | Field and Description |
---|---|
protected QueueStack<org.xml.sax.Attributes> |
attrStack
Element attributes stack.
|
protected QueueStack<java.lang.String> |
divStack
Div tag stack.
|
protected java.util.Set<java.lang.String> |
pseudoPageContainerDivTypes
Pseudo-page ending div types.
|
protected int |
pseudoPageCount
Current pseudo page count.
|
protected int |
pseudoPageSize
Page size in number of tokens.
|
protected boolean |
pseudoPageStarted
True if pseudo page started.
|
protected int |
pseudoPageWordCount
Current pseudo page word count.
|
protected java.util.List<java.lang.String> |
tagList
List of tags for determining node ancestry of each word.
|
Constructor and Description |
---|
PseudoPageAdderFilter(org.xml.sax.XMLReader reader,
int pseudoPageSize,
java.lang.String pageEndingDivTypes)
Create adorned word info filter.
|
Modifier and Type | Method and Description |
---|---|
void |
characters(char[] ch,
int start,
int length)
Handle character data.
|
PendingElement |
createPseudoPageElement(java.lang.String uri,
boolean forcedEmit,
boolean start,
java.lang.String path)
Create a pseudo page milestone.
|
void |
emitPseudoPageElement(PendingElement pseudoPageElement)
Emit a pseudo page milestone.
|
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
Handle end of an element.
|
void |
ignorableWhitespace(char[] ch,
int start,
int length)
Handle whitespace.
|
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
Handle start of an XML element.
|
removeAttribute, setAttributeValue, setAttributeValue, setAttributeValue
endDocument, endPrefixMapping, error, fatalError, getContentHandler, getDTDHandler, getEntityResolver, getErrorHandler, getFeature, getParent, getProperty, notationDecl, parse, parse, processingInstruction, resolveEntity, setContentHandler, setDocumentLocator, setDTDHandler, setEntityResolver, setErrorHandler, setFeature, setParent, setProperty, skippedEntity, startDocument, startPrefixMapping, unparsedEntityDecl, warning
protected java.util.List<java.lang.String> tagList
protected int pseudoPageSize
protected int pseudoPageCount
protected int pseudoPageWordCount
protected boolean pseudoPageStarted
protected QueueStack<java.lang.String> divStack
protected QueueStack<org.xml.sax.Attributes> attrStack
protected java.util.Set<java.lang.String> pseudoPageContainerDivTypes
public PseudoPageAdderFilter(org.xml.sax.XMLReader reader, int pseudoPageSize, java.lang.String pageEndingDivTypes)
reader
- XML input reader to which this filter applies.pseudoPageSize
- Number of words in a pseudopage.pageEndingDivTypes
- div types that end a pseudopage.public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
startElement
in class org.xml.sax.helpers.XMLFilterImpl
uri
- The XML element's URI.localName
- The XML element's local name.qName
- The XML element's qname.atts
- The XML element's attributes.org.xml.sax.SAXException
public void characters(char[] ch, int start, int length) throws org.xml.sax.SAXException
characters
in interface org.xml.sax.ContentHandler
characters
in class org.xml.sax.helpers.XMLFilterImpl
ch
- Array of characters.start
- The starting position in the array.length
- The number of characters.org.xml.sax.SAXException
- If there is an error.public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException
ignorableWhitespace
in interface org.xml.sax.ContentHandler
ignorableWhitespace
in class org.xml.sax.helpers.XMLFilterImpl
ch
- Array of characters.start
- The starting position in the array.length
- The number of characters.org.xml.sax.SAXException
- If there is an error.public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
endElement
in class org.xml.sax.helpers.XMLFilterImpl
uri
- The XML element's URI.localName
- The XML element's local name.qName
- The XML element's qname.org.xml.sax.SAXException
public PendingElement createPseudoPageElement(java.lang.String uri, boolean forcedEmit, boolean start, java.lang.String path)
uri
- Element URI.forcedEmit
- Emit pseudo page milestone even if
not enough words accumulated, as long as
at least one word in current block.start
- true if starting milestone, false if ending.path
- Path attribute. May be null.public void emitPseudoPageElement(PendingElement pseudoPageElement)
pseudoPageElement
- The pseudo page element to emit.