public class TextTiling
extends java.lang.Object
Use of this code is free for academic, education, research and other non-profit making uses only.
Modifier and Type | Field and Description |
---|---|
protected RawText |
C
Collection for segmentation.
|
protected double[] |
depth_score
Depth scores.
|
protected static java.io.PrintStream |
printStream
Wrapper for printStream to allow utf-8 output.
|
protected int |
s
Step size.
|
protected StopWords |
S
Stopwords for noise reduction.
|
protected java.util.List<java.lang.Integer> |
segmentation
Segment boundaries.
|
protected double[] |
sim_score
Similarity scores and the corresponding locations.
|
protected int[] |
site_loc |
protected java.util.Map<java.lang.String,java.lang.String> |
stemOf
Token -> stem dictionary
|
protected int |
w
Size of sliding window.
|
Constructor and Description |
---|
TextTiling()
Create text tiler.
|
TextTiling(RawText c,
StopWords s)
Create text tiler from text collection and stop word set.
|
Modifier and Type | Method and Description |
---|---|
protected void |
blockAdd(java.lang.String term,
java.util.Map<java.lang.String,java.lang.Integer> B)
Add a term to a block
Creation date: (07/12/99 01:41:24)
|
protected double |
blockCosine(java.util.Map<java.lang.String,java.lang.Integer> B1,
java.util.Map<java.lang.String,java.lang.Integer> B2)
Compute the cosine similarity measure for two blocks
Creation date: (07/12/99 01:49:16)
|
protected void |
blockRemove(java.lang.String term,
java.util.Map<java.lang.String,java.lang.Integer> B)
Remove a term from the block.
|
void |
boundaryIdentification()
Identify the boundaries
|
void |
depthScore()
Compute depth score after applying similarityDetermination()
|
java.util.List<java.lang.Integer> |
getSegmentation()
Return segmentation list.
|
java.util.Map<java.lang.String,java.lang.String> |
getStemOf()
Return stem dictionary.
|
StopWords |
getStopWords()
Return stop words.
|
protected boolean |
include(int i)
Decide whether word i is worth using as feature for segmentation.
|
protected void |
preprocess()
Perform some preprocessing to save execution time
|
void |
setStepSize(int stepSize) |
void |
setWindowSize(int windowSize) |
void |
similarityDetermination()
Compute the similarity score.
|
protected static java.io.PrintStream printStream
protected int w
protected int s
protected RawText C
protected StopWords S
protected java.util.Map<java.lang.String,java.lang.String> stemOf
protected double[] sim_score
protected int[] site_loc
protected double[] depth_score
protected java.util.List<java.lang.Integer> segmentation
public java.util.List<java.lang.Integer> getSegmentation()
public java.util.Map<java.lang.String,java.lang.String> getStemOf()
public StopWords getStopWords()
protected void blockAdd(java.lang.String term, java.util.Map<java.lang.String,java.lang.Integer> B)
term
- java.lang.StringB
- java.util.HashMappublic void setWindowSize(int windowSize)
public void setStepSize(int stepSize)
protected double blockCosine(java.util.Map<java.lang.String,java.lang.Integer> B1, java.util.Map<java.lang.String,java.lang.Integer> B2)
B1
- java.util.HashMapB2
- java.util.HashMapprotected void blockRemove(java.lang.String term, java.util.Map<java.lang.String,java.lang.Integer> B)
term
- java.lang.StringB
- java.util.HashMappublic void boundaryIdentification()
public void depthScore()
protected boolean include(int i)
i
- intprotected void preprocess()
public void similarityDetermination()