public class CharUtils
extends java.lang.Object
This class provides various static utility methods for manipulating characters.
Modifier and Type | Field and Description |
---|---|
protected static java.util.regex.Matcher |
asteriskMatcher |
protected static java.util.regex.Pattern |
asteriskPattern
Pattern for 1 or more asterisks.
|
static char |
ASTERISM
Unicode asterism.
|
static java.lang.String |
ASTERISM_STRING |
static char |
BLACKCIRCLE
Unicode black circle/dot.
|
static java.lang.String |
BLACKCIRCLE_STRING |
static char |
BROKEN_VERTICAL_BAR
Broken vertical bar.
|
static java.lang.String |
BROKEN_VERTICAL_BAR_STRING
Broken vertical bar string.
|
static char |
CHAR_END_OF_TEXT_SECTION
End of text section marker character.
|
static java.lang.String |
CHAR_END_OF_TEXT_SECTION_STRING |
static char |
CHAR_FAKE_SOFT_HYPHEN
Substitute soft hyphen marker.
|
static java.lang.String |
CHAR_FAKE_SOFT_HYPHEN_STRING |
static char |
CHAR_GAP_MARKER
Gap marker inside words.
|
static java.lang.String |
CHAR_GAP_MARKER_STRING
Gap marker inside words as string.
|
static char |
CHAR_SUBSTITUTE_SINGLE_QUOTE
Substitute single quote character.
|
static java.lang.String |
CHAR_SUBSTITUTE_SINGLE_QUOTE_STRING |
static char |
CHAR_SUP_TEXT_MARKER
text marker.
|
static java.lang.String |
CHAR_SUP_TEXT_MARKER_STRING |
static char |
COMBINING_MACRON
Combining macron character.
|
static java.lang.String |
COMBINING_MACRON_STRING
Combining macron string.
|
static char |
DAGGER
Unicode dagger.
|
static java.lang.String |
DAGGER_STRING |
static char |
DEGREES_MARK
Degrees/Hours.
|
static java.lang.String |
DEGREES_MARK_STRING |
static java.lang.String |
digitsPattern
Digits pattern.
|
static char |
DIVIDER_VERTICAL_BAR
Divider vertical bar.
|
static java.lang.String |
DIVIDER_VERTICAL_BAR_STRING
Divider vertical bar string.
|
static char |
DOTTEDCIRCLE
Unicode dotted circle.
|
static java.lang.String |
DOTTEDCIRCLE_STRING |
static char |
DOUBLE_DAGGER
Unicode double dagger.
|
static java.lang.String |
DOUBLE_DAGGER_STRING |
static char |
ELLIPSIS
Ellipsis (...) character.
|
static java.lang.String |
ELLIPSIS_STRING |
static char |
EURO_SIGN
New Euro symbol.
|
protected static java.util.regex.Matcher |
hyphenMatcher |
protected static java.util.regex.Pattern |
hyphenPattern
Pattern for 1 or more hyphens.
|
static char |
INVISIBLE_SEPARATOR
Unicode invisible separator.
|
static java.lang.String |
INVISIBLE_SEPARATOR_STRING |
static char |
LDQUOTE
Left double curly quote.
|
static java.lang.String |
LDQUOTE_STRING |
static char |
LEFT_ANGLE_BRACKET
Unicode left angle bracket.
|
static java.lang.String |
LEFT_ANGLE_BRACKET_STRING |
protected static java.util.regex.Matcher |
lettersMatcher |
protected static java.util.regex.Pattern |
lettersPattern
Pattern for Unicode letters only.
|
static char |
LIGHT_VERTICAL_BAR
Light vertical bar.
|
static java.lang.String |
LIGHT_VERTICAL_BAR_STRING
Light vertical bar string.
|
static char |
LONG_DASH
Long dash.
|
static java.lang.String |
LONG_DASH_STRING
Long dash string.
|
static char |
LONG_S
Long s.
|
static java.lang.String |
LONG_S_STRING |
static char |
LOZENGE
Unicode lozenge.
|
static java.lang.String |
LOZENGE_STRING |
static char |
LSQUOTE
Left single curly quote.
|
static java.lang.String |
LSQUOTE_STRING |
static char |
MINUTES_MARK |
static char |
NONBREAKING_BLANK
Nonbreaking blank character.
|
static java.lang.String |
NONBREAKING_BLANK_STRING
Nonbreaking blank string.
|
static char |
NONBREAKING_HYPHEN
Nonbreaking hyphen character.
|
static java.lang.String |
NONBREAKING_HYPHEN_STRING
Nonbreaking hyphen string.
|
static char |
OLD_EURO_SIGN
Old Euro symbol.
|
static java.lang.String |
ordinalNumberPattern
Ordinal number pattern (English only!)
|
static char |
PARAGRAPH
Paragraph.
|
static java.lang.String |
PARAGRAPH_STRING |
protected static java.util.regex.Matcher |
possessiveAsteriskMatcher |
protected static java.util.regex.Pattern |
possessiveAsteriskPattern
Pattern for 1 or more asterisks followed by 's.
|
protected static java.util.regex.Matcher |
possessiveDashesMatcher |
protected static java.util.regex.Pattern |
possessiveDashesPattern
Pattern for 2 or more dashes followed by 's.
|
protected static java.util.regex.Matcher |
punctuationMatcher |
protected static java.util.regex.Pattern |
punctuationPattern
Pattern for punctuation only.
|
protected static java.util.Set<java.lang.Integer> |
punctuationSet
Set of punctuation values.
|
static char |
RDQUOTE
Right double curly quote.
|
static java.lang.String |
RDQUOTE_STRING |
static char |
RIGHT_ANGLE_BRACKET
Unicode right angle bracket.
|
static java.lang.String |
RIGHT_ANGLE_BRACKET_STRING |
static char |
RLSQUOTE
Right low single curly quote.
|
static java.lang.String |
RLSQUOTE_STRING |
static char |
RSQUOTE
Right single curly quote.
|
static java.lang.String |
RSQUOTE_STRING |
static char |
SECONDS_MARK |
static char |
SECTION_SIGN
Section sign.
|
static java.lang.String |
SECTION_SIGN_STRING |
static char |
SHORT_DASH
Long dash.
|
static java.lang.String |
SHORT_DASH_STRING
Short dash string.
|
static char |
SOLAR_CIRCLE
Solar circle character.
|
static java.lang.String |
SOLAR_CIRCLE_STRING |
static char |
SOLIDCIRCLE
Unicode filled-in circle/dot.
|
static java.lang.String |
SOLIDCIRCLE_STRING |
static java.lang.String |
SPAN_GAP_MARKER
Span gap marker.
|
protected static java.util.Set<java.lang.Integer> |
subscriptSet
Set of subscript characters.
|
protected static java.util.Set<java.lang.Integer> |
superscriptSet
Set of superscript characters.
|
protected static java.util.Set<java.lang.Integer> |
symbolSet
Set of symbol values.
|
static char |
UNKNOWN_PUNC
Unknown punctuation marker.
|
static char |
VERTICAL_BAR
Vertical bar.
|
static java.lang.String |
VERTICAL_BAR_STRING
Vertical bar string.
|
protected static java.util.regex.Matcher |
wordMatcher |
protected static java.util.regex.Pattern |
wordPattern
Pattern for Unicode word.
|
Modifier | Constructor and Description |
---|---|
protected |
CharUtils()
Don't allow instantiation, do allow overrides.
|
Modifier and Type | Method and Description |
---|---|
static boolean |
allLettersCapital(java.lang.String s)
True if all letters in a string are uppercase.
|
static java.lang.String |
capitalizeFirstLetter(java.lang.String s)
Capitalize first letter in string.
|
static boolean |
endsWithSingleQuote(java.lang.String s)
True if string ends with single quote.
|
static boolean |
endsWithSingleQuoteS(java.lang.String s)
True if string ends with "single quote + s".
|
static java.lang.String |
evictDashes(java.lang.String s)
Evict dashes from a string.
|
static java.lang.String |
getCaseOld(java.lang.String s)
Get case value for a string.
|
static int |
getLetterCase(java.lang.String s)
Get case value for a string.
|
static boolean |
hasApostrophe(java.lang.String s)
True if string contains at least one apostrophe.
|
static boolean |
hasCapitalLetter(java.lang.String s)
True if string contains at least one capital letter.
|
static boolean |
hasDash(java.lang.String s)
True if string contains a dash of some kind.
|
static boolean |
hasDigit(java.lang.String s)
True if at least one character in a string is a digit.
|
static boolean |
hasGapCharacter(java.lang.String s)
True if string contains at least one gap marker character.
|
static boolean |
hasGapMarkers(java.lang.String s)
True if any characters in a string are gap markers.
|
static boolean |
hasGreekLetters(java.lang.String s)
True if any characters in a string are Greek letters.
|
static boolean |
hasInternalCaps(java.lang.String s)
True if string contains internal capital letters.
|
static boolean |
hasPeriod(java.lang.String s)
True if string contains at least one period.
|
static boolean |
hasPunctuation(java.lang.String s)
True if any characters in a string are punctuation.
|
static boolean |
hasPunctuationNotApostrophes(java.lang.String s)
True if any characters in a string are punctuation.
|
static boolean |
hasSingleQuote(java.lang.String s)
True if string contains a single quote.
|
static boolean |
hasSymbols(java.lang.String s)
True if any characters in a string are symbols.
|
static boolean |
isAllAsterisks(java.lang.String s)
True if string is all asterisks.
|
static boolean |
isAllCaps(java.lang.String s)
True if string is all caps.
|
static boolean |
isAllHyphens(java.lang.String s)
True if string is all hyphens.
|
static boolean |
isAllLowerCase(java.lang.String s)
True if string is all lower case.
|
static boolean |
isAllPeriods(java.lang.String s)
True if string is all periods.
|
static boolean |
isApostrophe(char c)
True if character is an apostrophe.
|
static boolean |
isAWord(java.lang.String s)
Check if a string is a word (Unicode letters, digits, hyphen).
|
static boolean |
isBreakingDash(char c)
True if character is a breaking dash of some kind.
|
static boolean |
isCapitalLetter(char c)
Check if character is a capital letter.
|
static boolean |
isClosingQuote(char c)
True if character is any kind of closing quote.
|
static boolean |
isCurrency(java.lang.String token)
Check for currency.
|
static boolean |
isDash(char c)
True if character is a dash of some kind.
|
static boolean |
isDigit(char c)
True if character is a digit.
|
static boolean |
isDigits(java.lang.String s)
True if all characters in a string are digits.
|
static boolean |
isEnglishVowel(char c)
Is character an English vowel?
|
static boolean |
isFirstLetterCapital(java.lang.String s)
True if first letter in a string is uppercase.
|
static boolean |
isGapMarker(char c)
True if character is a gap marker.
|
static boolean |
isGreekLetter(char c)
True if character is a Greek letter.
|
static boolean |
isLetter(char c)
Check if character is a letter.
|
static boolean |
isLetter(java.lang.String s)
Check if string is a single letter.
|
static boolean |
isLetters(java.lang.String s)
Check if string contains only letters.
|
static boolean |
isNumber(java.lang.String s)
True if a string is a number.
|
static boolean |
isOpeningQuote(char c)
True if character is any kind of opening quote.
|
static boolean |
isOrdinal(java.lang.String s)
True if a string is an ordinal number.
|
static boolean |
isPossessiveAsterisks(java.lang.String s)
True if string is asterisks followed by 's.
|
static boolean |
isPossessiveDashes(java.lang.String s)
True if string is two or more dashes followed by 's.
|
static boolean |
isPunctuation(char ch)
True if character is punctuation.
|
static boolean |
isPunctuation(java.lang.String s)
True if all characters in a string are punctuation.
|
static boolean |
isPunctuationOrSymbol(char c)
True if character is punctuation or symbol.
|
static boolean |
isPunctuationOrSymbol(java.lang.String s)
True if all characters in a string are punctuation or symbols.
|
static boolean |
isSingleOpeningQuote(char c)
True if character is a single opening quote.
|
static boolean |
isSingleQuote(char c)
True if character is single quote.
|
static boolean |
isSingleQuoteS(java.lang.String s)
True if string is "single quote + s".
|
static boolean |
isSubscript(char ch)
Is character a subscript character.
|
static boolean |
isSuperscript(char ch)
Is character a superscript character.
|
static boolean |
isSymbol(char ch)
True if character is symbol.
|
static boolean |
isSymbol(java.lang.String s)
True if all characters in a string are symbols.
|
static boolean |
isUpperCase(java.lang.String s)
True if all characters in a string are uppercase.
|
static boolean |
isUSCurrency(java.lang.String token)
Check for US currency.
|
static boolean |
isUSCurrencyCents(java.lang.String token)
Check for US currency with cents.
|
static boolean |
isWhitespace(char c)
True if character is whitespace.
|
static java.lang.String |
makeCaseMatch(java.lang.String s,
java.lang.String sCaseToMatch)
Make case of string match another string's case.
|
static java.lang.String |
normalizeString(java.lang.String s)
Convert super/subscript characters to normal equivalents.
|
protected static java.util.Set<java.lang.Integer> punctuationSet
protected static java.util.Set<java.lang.Integer> symbolSet
protected static java.util.Set<java.lang.Integer> superscriptSet
protected static java.util.Set<java.lang.Integer> subscriptSet
public static final char PARAGRAPH
public static final java.lang.String PARAGRAPH_STRING
public static final char SECTION_SIGN
public static final java.lang.String SECTION_SIGN_STRING
public static final char LSQUOTE
public static final java.lang.String LSQUOTE_STRING
public static final char RSQUOTE
public static final java.lang.String RSQUOTE_STRING
public static final char RLSQUOTE
public static final java.lang.String RLSQUOTE_STRING
public static final char LDQUOTE
public static final java.lang.String LDQUOTE_STRING
public static final char RDQUOTE
public static final java.lang.String RDQUOTE_STRING
public static final char SOLIDCIRCLE
public static final java.lang.String SOLIDCIRCLE_STRING
public static final char BLACKCIRCLE
public static final java.lang.String BLACKCIRCLE_STRING
public static final char DEGREES_MARK
public static final java.lang.String DEGREES_MARK_STRING
public static final char MINUTES_MARK
public static final char SECONDS_MARK
public static final char CHAR_GAP_MARKER
public static final java.lang.String CHAR_GAP_MARKER_STRING
public static final char CHAR_END_OF_TEXT_SECTION
public static final java.lang.String CHAR_END_OF_TEXT_SECTION_STRING
public static final char CHAR_FAKE_SOFT_HYPHEN
public static final java.lang.String CHAR_FAKE_SOFT_HYPHEN_STRING
public static final char CHAR_SUBSTITUTE_SINGLE_QUOTE
public static final java.lang.String CHAR_SUBSTITUTE_SINGLE_QUOTE_STRING
public static final char CHAR_SUP_TEXT_MARKER
public static final java.lang.String CHAR_SUP_TEXT_MARKER_STRING
public static final char LONG_DASH
public static final java.lang.String LONG_DASH_STRING
public static final char SHORT_DASH
public static final java.lang.String SHORT_DASH_STRING
public static final char OLD_EURO_SIGN
public static final char EURO_SIGN
public static final char UNKNOWN_PUNC
public static final char COMBINING_MACRON
public static final java.lang.String COMBINING_MACRON_STRING
public static final char NONBREAKING_BLANK
public static final java.lang.String NONBREAKING_BLANK_STRING
public static final char NONBREAKING_HYPHEN
public static final java.lang.String NONBREAKING_HYPHEN_STRING
public static final char VERTICAL_BAR
public static final java.lang.String VERTICAL_BAR_STRING
public static final char BROKEN_VERTICAL_BAR
public static final java.lang.String BROKEN_VERTICAL_BAR_STRING
public static final char LIGHT_VERTICAL_BAR
public static final java.lang.String LIGHT_VERTICAL_BAR_STRING
public static final char DIVIDER_VERTICAL_BAR
public static final java.lang.String DIVIDER_VERTICAL_BAR_STRING
public static final char ELLIPSIS
public static final java.lang.String ELLIPSIS_STRING
public static final char SOLAR_CIRCLE
public static final java.lang.String SOLAR_CIRCLE_STRING
public static final char DOTTEDCIRCLE
public static final java.lang.String DOTTEDCIRCLE_STRING
public static final char LEFT_ANGLE_BRACKET
public static final java.lang.String LEFT_ANGLE_BRACKET_STRING
public static final char RIGHT_ANGLE_BRACKET
public static final java.lang.String RIGHT_ANGLE_BRACKET_STRING
public static final char LOZENGE
public static final java.lang.String LOZENGE_STRING
public static final char ASTERISM
public static final java.lang.String ASTERISM_STRING
public static final char DAGGER
public static final java.lang.String DAGGER_STRING
public static final char DOUBLE_DAGGER
public static final java.lang.String DOUBLE_DAGGER_STRING
public static final char INVISIBLE_SEPARATOR
public static final java.lang.String INVISIBLE_SEPARATOR_STRING
public static final java.lang.String SPAN_GAP_MARKER
public static final char LONG_S
public static final java.lang.String LONG_S_STRING
public static java.lang.String digitsPattern
public static java.lang.String ordinalNumberPattern
protected static final java.util.regex.Pattern hyphenPattern
protected static final java.util.regex.Matcher hyphenMatcher
protected static final java.util.regex.Pattern asteriskPattern
protected static final java.util.regex.Matcher asteriskMatcher
protected static final java.util.regex.Pattern possessiveAsteriskPattern
protected static final java.util.regex.Matcher possessiveAsteriskMatcher
protected static final java.util.regex.Pattern possessiveDashesPattern
protected static final java.util.regex.Matcher possessiveDashesMatcher
protected static final java.util.regex.Pattern wordPattern
protected static final java.util.regex.Matcher wordMatcher
protected static final java.util.regex.Pattern lettersPattern
protected static final java.util.regex.Matcher lettersMatcher
protected static final java.util.regex.Pattern punctuationPattern
protected static final java.util.regex.Matcher punctuationMatcher
public static boolean isLetter(char c)
c
- Character to test.public static boolean isLetter(java.lang.String s)
s
- String to test.public static boolean isLetters(java.lang.String s)
s
- String to test.public static boolean isAWord(java.lang.String s)
s
- String to test.public static boolean isCapitalLetter(char c)
c
- Character to test.public static boolean isPunctuation(char ch)
public static boolean isPunctuation(java.lang.String s)
s
- String to check for punctuation.public static boolean isSymbol(char ch)
public static boolean isSymbol(java.lang.String s)
s
- String to check for symbols.public static boolean isPunctuationOrSymbol(char c)
c
- Character to check for punctuation or symbol.public static boolean isPunctuationOrSymbol(java.lang.String s)
s
- String to check for punctuation and symbols.public static boolean isDigit(char c)
c
- Character to check for being a digit.public static boolean isDigits(java.lang.String s)
s
- String to check for digits.public static boolean hasDigit(java.lang.String s)
s
- String to check for digits.public static boolean isDash(char c)
c
- Character to check for being a dash.public static boolean isBreakingDash(char c)
c
- Character to check for being a breaking dash.public static boolean hasDash(java.lang.String s)
s
- String to check for containing a dash.public static java.lang.String evictDashes(java.lang.String s)
s
- String from which to evict dashes.public static boolean isNumber(java.lang.String s)
s
- String to check for being a number.public static boolean isOrdinal(java.lang.String s)
s
- String to check for being an ordinal number.public static boolean allLettersCapital(java.lang.String s)
s
- String to check for upper case letters.Note: non-letters are ignored. The result is false if there are no letters in the string.
public static boolean isUpperCase(java.lang.String s)
s
- String to check for upper case letters.All characters are checked, letters and non-letters alike.
public static boolean isFirstLetterCapital(java.lang.String s)
s
- String to check for initial uppercase letter.Leading non-letters are ignored. If none of the characters in the string is a letters, false is returned.
public static boolean hasPunctuation(java.lang.String s)
s
- String to check for punctuation.public static boolean hasPunctuationNotApostrophes(java.lang.String s)
s
- String to check for punctuation.public static boolean hasSymbols(java.lang.String s)
s
- String to check for symbols.public static boolean isGapMarker(char c)
c
- Character to check for being a gap marker.public static boolean hasGapMarkers(java.lang.String s)
s
- String to check for gap markers.public static boolean isGreekLetter(char c)
c
- Character to check for being a Greek letter.public static boolean hasGreekLetters(java.lang.String s)
s
- String to check for Greek letters.public static boolean isEnglishVowel(char c)
c
- Character to check.public static boolean isSingleQuote(char c)
c
- Character to check for being a single quote.public static boolean isApostrophe(char c)
c
- Character to check for being an apostrophe.public static boolean isSingleOpeningQuote(char c)
c
- Character to check for being a single opening quote.public static boolean endsWithSingleQuoteS(java.lang.String s)
s
- String to check for ending with single quote + s.public static boolean isSingleQuoteS(java.lang.String s)
s
- String to check for being single quote + s.public static boolean endsWithSingleQuote(java.lang.String s)
s
- String to check for ending with single quote.public static boolean isOpeningQuote(char c)
c
- Character to check for being an opening quote.public static boolean isClosingQuote(char c)
c
- Character to check for being a closing quote.public static boolean hasSingleQuote(java.lang.String s)
s
- String to check for containing a single quote.public static boolean isAllCaps(java.lang.String s)
s
- String to check for being all capitals.public static boolean isAllLowerCase(java.lang.String s)
s
- String to check for being all lower case.public static boolean hasCapitalLetter(java.lang.String s)
s
- String to check for having a capital letter.public static boolean hasApostrophe(java.lang.String s)
s
- String to check for having an apostrophe.public static boolean hasPeriod(java.lang.String s)
s
- String to check for having a period.public static boolean hasGapCharacter(java.lang.String s)
s
- String to check for having a gap marker character.public static boolean hasInternalCaps(java.lang.String s)
s
- String to check for having internal capitals.public static boolean isAllPeriods(java.lang.String s)
s
- String to check for being all periods.public static boolean isAllHyphens(java.lang.String s)
s
- String to check for being all hyphens.public static boolean isAllAsterisks(java.lang.String s)
s
- String to check for being all asterisks.public static boolean isPossessiveAsterisks(java.lang.String s)
s
- String to check for being asterisks followed by 's.public static boolean isPossessiveDashes(java.lang.String s)
s
- String to check for being dashes followed by 's.public static boolean isWhitespace(char c)
c
- Character to check for being whitespace.public static java.lang.String makeCaseMatch(java.lang.String s, java.lang.String sCaseToMatch)
s
- String whose case should be changed.sCaseToMatch
- String whose case should be matched.public static java.lang.String capitalizeFirstLetter(java.lang.String s)
s
- String to capitalize.public static boolean isUSCurrency(java.lang.String token)
token
- Word to check for currency.public static boolean isUSCurrencyCents(java.lang.String token)
token
- Word to check for currency.public static boolean isCurrency(java.lang.String token)
token
- Word to check for currency.public static java.lang.String getCaseOld(java.lang.String s)
s
- The string.public static int getLetterCase(java.lang.String s)
s
- The string.public static java.lang.String normalizeString(java.lang.String s)
s
- String containing super/subscript characters.public static boolean isSuperscript(char ch)
ch
- Character to check.public static boolean isSubscript(char ch)
ch
- Character to check.