package edu.northwestern.at.morphadorner.examples;
import java.io.*;
import java.util.*;
import edu.northwestern.at.morphadorner.tools.*;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencemelder.*;
import edu.northwestern.at.utils.*;
public class UsingAnAdornedText
{
protected static AdornedXMLReader xmlReader;
protected static List<String> wordIDs =
ListFactory.createNewList();
protected static PrintStream printStream;
public static void main( String[] args )
{
try
{
doit( args );
}
catch ( Exception e )
{
e.printStackTrace();
}
}
public static void doit( String[] args )
throws Exception
{
printStream =
new PrintStream
(
new BufferedOutputStream( System.out ) ,
true ,
"utf-8"
);
xmlReader = new AdornedXMLReader( args[ 0 ] );
wordIDs = xmlReader.getAdornedWordIDs();
printStream.println
(
"Read " +
StringUtils.formatNumberWithCommas( wordIDs.size() ) +
" words from " + args[ 0 ] + " ."
);
List<List<ExtendedAdornedWord>> sentences =
xmlReader.getSentences();
printStream.println
(
"Read " +
StringUtils.formatNumberWithCommas( sentences.size() ) +
" sentences from " + args[ 0 ] + " ."
);
printStream.println();
printStream.println
(
"The first five sentences are:"
);
printStream.println();
printStream.println( StringUtils.dupl( "-" , 70 ) );
SentenceMelder melder = new SentenceMelder();
for ( int i = 0 ; i < Math.min( 5 , sentences.size() ) ; i++ )
{
String sentenceText =
melder.reconstituteSentence( sentences.get( i ) );
sentenceText =
StringUtils.wrapText(
sentenceText, Env.LINE_SEPARATOR , 70 );
printStream.println
(
( i + 1 ) + ": " +
sentenceText
);
}
printStream.println( StringUtils.dupl( "-" , 70 ) );
printStream.println();
if ( sentences.size() > 2 )
{
printStream.println();
printStream.println
(
"Words in the third sentence:"
);
printStream.println();
printStream.println( StringUtils.dupl( "-" , 70 ) );
List<ExtendedAdornedWord> sentence = sentences.get( 2 );
for ( int i = 0 ; i < sentence.size() ; i++ )
{
ExtendedAdornedWord adornedWord = sentence.get( i );
printStream.println( "Word " + ( i + 1 ) );
printStream.println(
" Word ID : " + adornedWord.getID() );
printStream.println(
" Token : " + adornedWord.getToken() );
printStream.println(
" Spelling : " + adornedWord.getSpelling() );
printStream.println(
" Lemmata : " + adornedWord.getLemmata() );
printStream.println(
" Pos tags : " +
adornedWord.getPartsOfSpeech() );
printStream.println(
" Standard spelling: " +
adornedWord.getStandardSpelling() );
printStream.println(
" Sentence number : " +
adornedWord.getSentenceNumber() );
printStream.println(
" Word number : " +
adornedWord.getWordNumber() );
printStream.println(
" XML path : " +
adornedWord.getPath() );
printStream.println(
" is EOS : " +
adornedWord.getEOS() );
printStream.println(
" word part flag : " +
adornedWord.getPart() );
printStream.println(
" word ordinal : " +
adornedWord.getOrd() );
printStream.println(
" page number : " +
adornedWord.getPageNumber() );
printStream.println(
" Main or side text: " +
adornedWord.getMainSide() );
printStream.println(
" is spoken : " +
adornedWord.getSpoken() );
printStream.println(
" is verse : " +
adornedWord.getVerse() );
printStream.println(
" in jump tag : " +
adornedWord.getInJumpTag() );
printStream.println(
" is a gap : " +
adornedWord.getGap() );
}
printStream.println( StringUtils.dupl( "-" , 70 ) );
printStream.println();
}
generateXML( args[ 1 ] , args[ 2 ] );
generateXML( args[ 3 ] , args[ 4 ] );
}
public static void generateXML
(
String firstWordID ,
String secondWordID
)
{
String xml = xmlReader.generateXML( firstWordID , secondWordID );
printStream.println();
printStream.println( "Generated XML for words " +
firstWordID + " through " + secondWordID + ":" );
printStream.println();
printStream.println( StringUtils.dupl( "-" , 70 ) );
printStream.println( xml );
printStream.println( StringUtils.dupl( "-" , 70 ) );
printStream.println();
}
}