struct Char

Overview

AChar represents aUnicodecode point. It occupies 32 bits.

It is created by enclosing an UTF-8 character in single quotes.

'a'
'z'
'0'
'_'
'あ'

You can use a backslash to denote some characters:

'\'' # single quote
'\\' # backslash
'\e' # escape
'\f' # form feed
'\n' # newline
'\r' # carriage return
'\t' # tab
'\v' # vertical tab

You can use a backslash followed by anu and four hexadecimal characters to denote a unicode codepoint written:

'\u0041' # == 'A'

Or you can use curly braces and specify up to four hexadecimal numbers:

'\u{41}' # == 'A'

SeeChar literals in the language reference.

Included Modules

Defined in:

char.cr
char/reader.cr
primitives.cr

Constant Summary

MAX = 1114111.unsafe_chr

The maximum character.

MAX_CODEPOINT = 1114111

The maximum valid codepoint for a character.

REPLACEMENT = '�'

The replacement character, used on invalid UTF-8 byte sequences.

ZERO = '\0'

The character representing the end of a C string.

Instance Method Summary

Instance methods inherited from module Steppable

step(*, to limit = nil, by step, exclusive : Bool = false, &) : Nil
step(*, to limit = nil, by step, exclusive : Bool = false)
step

Instance methods inherited from module Comparable(Char)

<(other : T) : Bool <, <=(other : T) <=, <=>(other : T) <=>, ==(other : T) ==, >(other : T) : Bool >, >=(other : T) >=, clamp(min, max)
clamp(range : Range)
clamp

Instance methods inherited from struct Value

==(other : Log::Metadata::Value)
==(other : JSON::Any)
==(other : YAML::Any)
==(other)
==
, dup dup

Instance methods inherited from class Object

! : Bool !, !=(other) !=, !~(other) !~, ==(other) ==, ===(other : JSON::Any)
===(other : YAML::Any)
===(other)
===
, =~(other) =~, as(type : Class) as, as?(type : Class) as?, class class, dup dup, hash(hasher)
hash
hash
, in?(collection : Object) : Bool
in?(*values : Object) : Bool
in?
, inspect(io : IO) : Nil
inspect : String
inspect
, is_a?(type : Class) : Bool is_a?, itself itself, nil? : Bool nil?, not_nil!(message)
not_nil!
not_nil!
, pretty_inspect(width = 79, newline = "\n", indent = 0) : String pretty_inspect, pretty_print(pp : PrettyPrint) : Nil pretty_print, responds_to?(name : Symbol) : Bool responds_to?, tap(&) tap, to_json(io : IO) : Nil
to_json : String
to_json
, to_pretty_json(indent : String = " ") : String
to_pretty_json(io : IO, indent : String = " ") : Nil
to_pretty_json
, to_s(io : IO) : Nil
to_s : String
to_s
, to_yaml(io : IO) : Nil
to_yaml : String
to_yaml
, try(&) try, unsafe_as(type : T.class) forall T unsafe_as

Class methods inherited from class Object

from_json(string_or_io : String | IO, root : String)
from_json(string_or_io : String | IO)
from_json
, from_yaml(string_or_io : String | IO) from_yaml

Macros inherited from class Object

class_getter(*names, &block) class_getter, class_getter!(*names) class_getter!, class_getter?(*names, &block) class_getter?, class_property(*names, &block) class_property, class_property!(*names) class_property!, class_property?(*names, &block) class_property?, class_setter(*names) class_setter, def_clone def_clone, def_equals(*fields) def_equals, def_equals_and_hash(*fields) def_equals_and_hash, def_hash(*fields) def_hash, delegate(*methods, to object) delegate, forward_missing_to(delegate) forward_missing_to, getter(*names, &block) getter, getter!(*names) getter!, getter?(*names, &block) getter?, property(*names, &block) property, property!(*names) property!, property?(*names, &block) property?, setter(*names) setter

Instance Method Detail

def !=(other : Char) : Bool #

Returnstrue ifself's codepoint is not equal toother's codepoint.


def +(str : String) : String #

Concatenates this char andstring.

'f' + "oo" # => "foo"

def +(other : Int) : Char #

Returns a char that has this char's codepoint plusother.

'a' + 1 # => 'b'
'a' + 2 # => 'c'

def -(other : Char) : Int32 #

Returns the difference of the codepoint values of this char andother.

'a' - 'a' # => 0
'b' - 'a' # => 1
'c' - 'a' # => 2

def -(other : Int) : Char #

Returns a char that has this char's codepoint minusother.

'c' - 1 # => 'b'
'c' - 2 # => 'a'

def <(other : Char) : Bool #

Returnstrue ifself's codepoint is less thanother's codepoint.


def <=(other : Char) : Bool #

Returnstrue ifself's codepoint is less than or equal toother's codepoint.


def <=>(other : Char) #

The comparison operator.

Returns the difference of the codepoint values ofself andother. The result is either negative,0 or positive based on whetherother's codepoint is less, equal, or greater thanself's codepoint.

'a' <=> 'c' # => -2
'z' <=> 'z' # => 0
'c' <=> 'a' # => 2

def ==(other : Char) : Bool #

Returnstrue ifself's codepoint is equal toother's codepoint.


def ===(byte : Int) #

Returnstrue if the codepoint is equal tobyte ignoring the type.

'c'.ord       # => 99
'c' === 99_u8 # => true
'c' === 99    # => true
'z' === 99    # => false

def >(other : Char) : Bool #

Returnstrue ifself's codepoint is greater thanother's codepoint.


def >=(other : Char) : Bool #

Returnstrue ifself's codepoint is greater than or equal toother's codepoint.


def alphanumeric? : Bool #

Returnstrue if this char is a letter or a number according to unicode.

'c'.alphanumeric? # => true
'8'.alphanumeric? # => true
'.'.alphanumeric? # => false

def ascii? : Bool #

Returnstrue if this char is an ASCII character (codepoint is in (0..127))


def ascii_alphanumeric? : Bool #

Returnstrue if this char is an ASCII letter or number ('0' to'9','a' to'z','A' to'Z').

'c'.ascii_alphanumeric? # => true
'8'.ascii_alphanumeric? # => true
'.'.ascii_alphanumeric? # => false

def ascii_control? : Bool #

Returnstrue if this char is an ASCII control character.

This includes theC0 control codes (U+0000 throughU+001F) and the Delete character (U+007F).

('\u0000'..'\u0019').each do |char|
  char.control? # => true
end

('\u007F'..'\u009F').each do |char|
  char.control? # => true
end

def ascii_letter? : Bool #

Returnstrue if this char is an ASCII letter ('a' to'z','A' to'Z').

'c'.ascii_letter? # => true
'á'.ascii_letter? # => false
'8'.ascii_letter? # => false

def ascii_lowercase? : Bool #

Returnstrue if this char is a lowercase ASCII letter.

'c'.ascii_lowercase? # => true
'ç'.lowercase?       # => true
'G'.ascii_lowercase? # => false
'.'.ascii_lowercase? # => false

def ascii_number?(base : Int = 10) : Bool #

Returnstrue if this char is an ASCII number in specified base.

Base can be from 2 to 36 with digits from'0' to'9' and'a' to'z' or'A' to'Z'.

'4'.ascii_number?     # => true
'z'.ascii_number?     # => false
'z'.ascii_number?(36) # => true

def ascii_uppercase? : Bool #

Returnstrue if this char is an ASCII uppercase letter.

'H'.ascii_uppercase? # => true
'Á'.ascii_uppercase? # => false
'c'.ascii_uppercase? # => false
'.'.ascii_uppercase? # => false

def ascii_whitespace? : Bool #

Returnstrue if this char is an ASCII whitespace.

' '.ascii_whitespace?  # => true
'\t'.ascii_whitespace? # => true
'b'.ascii_whitespace?  # => false

def bytes : Array(UInt8) #

Returns this char bytes as encoded by UTF-8, as anArray(UInt8).

'a'.bytes # => [97]
'あ'.bytes # => [227, 129, 130]

def bytesize : Int32 #

Returns the number of UTF-8 bytes in this char.

'a'.bytesize # => 1
'好'.bytesize # => 3

def clone #

def control? : Bool #

Returnstrue if this char is a control character according to unicode.


def downcase(io : IO, options : Unicode::CaseOptions = :none) : Nil #

Writes the downcase equivalent of this char to the givenio.

This method takes into account the possibility that an downcase version of a char might result in multiple chars, like for 'İ', which results in'i' and a dot mark.

'Z'.downcase(STDOUT)                             # prints "z"
'ς'.downcase(STDOUT, Unicode::CaseOptions::Fold) # prints "σ"
'ẞ'.downcase(STDOUT, Unicode::CaseOptions::Fold) # prints "ss"
'ᾈ'.downcase(STDOUT, Unicode::CaseOptions::Fold) # prints "ἀι"

def downcase(options : Unicode::CaseOptions = :none) : Char #

Returns the downcase equivalent of this char.

Note that this only works for characters whose downcase equivalent yields a single codepoint. There are a few characters, like'İ', than when downcased result in multiple characters (in this case:'I' and the dot mark).

For more correct behavior see the overloads that receive a block or anIO.

'Z'.downcase # => 'z'
'x'.downcase # => 'x'
'.'.downcase # => '.'

Ifoptions.fold? is true, then returns the case-folded equivalent instead. Note that this will returnself if a multiple-character case folding exists, even if a separate single-character transformation is also defined in Unicode.

'Z'.downcase(Unicode::CaseOptions::Fold) # => 'z'
'x'.downcase(Unicode::CaseOptions::Fold) # => 'x'
'ς'.downcase(Unicode::CaseOptions::Fold) # => 'σ'
'ꭰ'.downcase(Unicode::CaseOptions::Fold) # => 'Ꭰ'
'ẞ'.downcase(Unicode::CaseOptions::Fold) # => 'ẞ' # not U+00DF 'ß'
'ᾈ'.downcase(Unicode::CaseOptions::Fold) # => "ᾈ" # not U+1F80 'ᾀ'

def downcase(options : Unicode::CaseOptions = :none, &) #

Yields each char for the downcase equivalent of this char.

This method takes into account the possibility that an downcase version of a char might result in multiple chars, like for 'İ', which results in'i' and a dot mark.

'Z'.downcase { |v| puts v }                             # prints 'z'
'ς'.downcase(Unicode::CaseOptions::Fold) { |v| puts v } # prints 'σ'
'ẞ'.downcase(Unicode::CaseOptions::Fold) { |v| puts v } # prints 's', 's'
'ᾈ'.downcase(Unicode::CaseOptions::Fold) { |v| puts v } # prints 'ἀ', 'ι'

def dump(io) #

Returns a representation ofself as an ASCII-compatible Crystal char literal, wrapped in single quotes.

Non-printable characters (see#printable?) and non-ASCII characters (codepoints largerU+007F) are escaped.

'a'.dump      # => "'a'"
'\t'.dump     # => "'\\t'"
'あ'.dump      # => "'\\u3042'"
'\u0012'.dump # => "'\\u0012'"
'😀'.dump      # => "'\\u{1F600}'"

See#unicode_escape for the format used to escape characters without a special escape sequence.

  • #inspect only escapes non-printable characters.

def dump : String #

Returns a representation ofself as an ASCII-compatible Crystal char literal, wrapped in single quotes.

Non-printable characters (see#printable?) and non-ASCII characters (codepoints largerU+007F) are escaped.

'a'.dump      # => "'a'"
'\t'.dump     # => "'\\t'"
'あ'.dump      # => "'\\u3042'"
'\u0012'.dump # => "'\\u0012'"
'😀'.dump      # => "'\\u{1F600}'"

See#unicode_escape for the format used to escape characters without a special escape sequence.

  • #inspect only escapes non-printable characters.

def each_byte(&) : Nil #

Yields each of the bytes of this char as encoded by UTF-8.

puts "'a'"
'a'.each_byte do |byte|
  puts byte
end
puts

puts "'あ'"
'あ'.each_byte do |byte|
  puts byte
end

Output:

'a'
97

'あ'
227
129
130

def hash(hasher) #

def hex? : Bool #

Returnstrue if this char is an ASCII hex digit ('0' to'9','a' to'f','A' to'F').

'5'.hex? # => true
'a'.hex? # => true
'F'.hex? # => true
'g'.hex? # => false

def in_set?(*sets : String) : Bool #

Returnstrue if this char is matched by the givensets.

Each parameter defines a set, the character is matched against the intersection of those, in other words it needs to match all sets.

If a set starts with a ^, it is negated. The sequence c1-c2 means all characters between and including c1 and c2 and is known as a range.

The backslash character\ can be used to escape ^ or - and is otherwise ignored unless it appears at the end of a range or set.

'l'.in_set? "lo"          # => true
'l'.in_set? "lo", "o"     # => false
'l'.in_set? "hello", "^l" # => false
'l'.in_set? "j-m"         # => true

'^'.in_set? "\\^aeiou" # => true
'-'.in_set? "a\\-eo"   # => true

'\\'.in_set? "\\"    # => true
'\\'.in_set? "\\A"   # => false
'\\'.in_set? "X-\\w" # => true

def inspect(io : IO) : Nil #

Returns a representation ofself as a Crystal char literal, wrapped in single quotes.

Non-printable characters (see#printable?) are escaped.

'a'.inspect      # => "'a'"
'\t'.inspect     # => "'\\t'"
'あ'.inspect      # => "'あ'"
'\u0012'.inspect # => "'\\u0012'"
'😀'.inspect      # => "'\u{1F600}'"

See#unicode_escape for the format used to escape characters without a special escape sequence.

  • #dump additionally escapes all non-ASCII characters.

def inspect : String #

Returns a representation ofself as a Crystal char literal, wrapped in single quotes.

Non-printable characters (see#printable?) are escaped.

'a'.inspect      # => "'a'"
'\t'.inspect     # => "'\\t'"
'あ'.inspect      # => "'あ'"
'\u0012'.inspect # => "'\\u0012'"
'😀'.inspect      # => "'\u{1F600}'"

See#unicode_escape for the format used to escape characters without a special escape sequence.

  • #dump additionally escapes all non-ASCII characters.

def letter? : Bool #

Returnstrue if this char is a letter.

All codepoints in the Unicode General CategoryL (Letter) are considered a letter.

'c'.letter? # => true
'á'.letter? # => true
'8'.letter? # => false

def lowercase? : Bool #

Returnstrue if this char is a lowercase letter.

'c'.lowercase? # => true
'ç'.lowercase? # => true
'G'.lowercase? # => false
'.'.lowercase? # => false
'Dz'.lowercase? # => false

def mark? : Bool #

Returnstrue if this char is a mark character according to unicode.


def number? : Bool #

Returnstrue if this char is a number according to unicode.

'1'.number? # => true
'a'.number? # => false

def ord : Int32 #

Returns the codepoint of this char.

The codepoint is the integer representation. The Universal Coded Character Set (UCS) standard, commonly known as Unicode, assigns names and meanings to numbers, these numbers are called codepoints.

For values below and including 127 this matches the ASCII codes and thus its byte representation.

'a'.ord      # => 97
'\0'.ord     # => 0
'\u007f'.ord # => 127
'☃'.ord      # => 9731

def pred : Char #

Returns the predecessor codepoint before this one.

This can be used for iterating a range of characters (seeRange#each).

'b'.pred # => 'a'
'ぃ'.pred # => 'あ'

This does not always returncodepoint - 1. There is a gap in the range of Unicode scalars: The surrogate codepointsU+D800 throughU+DFFF.

'\uE000'.pred # => '\uD7FF'

RaisesOverflowError forChar::ZERO.

  • #succ returns the successor codepoint.

def printable? #

Returnstrue if this char is a printable character.

There is no universal definition of printable characters in Unicode. For the purpose of this method, all characters with a visible glyph and the ASCII whitespace ( ) are considered printable.

This means characters which are#control? or#whitespace? (except for ) are non-printable.


def step(*, to limit = nil, exclusive : Bool = false, &) #

Performs a#step in the direction of thelimit. For instance:

'd'.step(to: 'a').to_a # => ['d', 'c', 'b', 'a']
'a'.step(to: 'd').to_a # => ['a', 'b', 'c', 'd']

def step(*, to limit = nil, exclusive : Bool = false) #

Performs a#step in the direction of thelimit. For instance:

'd'.step(to: 'a').to_a # => ['d', 'c', 'b', 'a']
'a'.step(to: 'd').to_a # => ['a', 'b', 'c', 'd']

def succ : Char #

Returns the successor codepoint after this one.

This can be used for iterating a range of characters (seeRange#each).

'a'.succ # => 'b'
'あ'.succ # => 'ぃ'

This does not always returncodepoint + 1. There is a gap in the range of Unicode scalars: The surrogate codepointsU+D800 throughU+DFFF.

'\uD7FF'.succ # => '\uE000'

RaisesOverflowError forChar::MAX.

  • #pred returns the predecessor codepoint.

def titlecase(io : IO, options : Unicode::CaseOptions = :none) : Nil #

Writes the titlecase equivalent of this char to the givenio.

Usually this is equivalent to#upcase, but a few precomposed characters consisting of multiple letters may yield a different character sequence where only the first letter is uppercase and the rest lowercase.

This method takes into account the possibility that a titlecase version of a char might result in multiple chars, like for 'ffl', which results in'F','f' and'l'.

'z'.titlecase(STDOUT) # prints "Z"
'DZ'.titlecase(STDOUT) # prints "Dz"
'ffl'.titlecase(STDOUT) # prints "Ffl"

def titlecase(options : Unicode::CaseOptions = :none) : Char #

Returns the titlecase equivalent of this char.

Usually this is equivalent to#upcase, but a few precomposed characters consisting of multiple letters may return a different character where only the first letter is uppercase and the rest lowercase.

Note that this only works for characters whose titlecase equivalent yields a single codepoint. There are a few characters, like'ffl', than when titlecased result in multiple characters (in this case:'F','f','l').

For more correct behavior see the overloads that receive a block or anIO.

'z'.titlecase # => 'Z'
'X'.titlecase # => 'X'
'.'.titlecase # => '.'
'DZ'.titlecase # => 'Dz'
'dz'.titlecase # => 'Dz'

def titlecase(options : Unicode::CaseOptions = :none, &) #

Yields each char for the titlecase equivalent of this char.

Usually this is equivalent to#upcase, but a few precomposed characters consisting of multiple letters may yield a different character sequence where only the first letter is uppercase and the rest lowercase.

This method takes into account the possibility that a titlecase version of a char might result in multiple chars, like for 'ffl', which results in'F','f' and'l'.

'z'.titlecase { |v| puts v } # prints 'Z'
'DZ'.titlecase { |v| puts v } # prints 'Dz'
'ffl'.titlecase { |v| puts v } # prints 'F', 'f', 'l'

def titlecase? : Bool #

Returnstrue if this char is a titlecase character, i.e. a ligature consisting of an uppercase letter followed by lowercase characters.

'Dz'.titlecase? # => true
'H'.titlecase? # => false
'c'.titlecase? # => false

def to_f : Float64 #

Returns the integer value of this char as a float if it's an ASCII char denoting a digit, raises otherwise.

'1'.to_f # => 1.0
'8'.to_f # => 8.0
'c'.to_f # raises ArgumentError

def to_f32 : Float32 #

See also:#to_f.


def to_f32? : Float32 | Nil #

See also:#to_f?.


def to_f64 : Float64 #

Same as#to_f.


def to_f64? : Float64 | Nil #

Same as#to_f?.


def to_f? : Float64 | Nil #

Returns the integer value of this char as a float if it's an ASCII char denoting a digit, nil otherwise.

'1'.to_f? # => 1.0
'8'.to_f? # => 8.0
'c'.to_f? # => nil

def to_i(base : Int = 10) : Int32 #

Returns the integer value of this char if it's an ASCII char denoting a digit inbase, raises otherwise.

'1'.to_i     # => 1
'8'.to_i     # => 8
'c'.to_i     # raises ArgumentError
'1'.to_i(16) # => 1
'a'.to_i(16) # => 10
'f'.to_i(16) # => 15
'z'.to_i(16) # raises ArgumentError

def to_i128(base : Int = 10) #

See also:#to_i.


def to_i128?(base : Int = 10) #

See also:#to_i?.


def to_i16(base : Int = 10) #

See also:#to_i.


def to_i16?(base : Int = 10) #

See also:#to_i?.


def to_i32(base : Int = 10) : Int32 #

Same as#to_i.


def to_i32?(base : Int = 10) : Int32 | Nil #

Same as#to_i?.


def to_i64(base : Int = 10) #

See also:#to_i.


def to_i64?(base : Int = 10) #

See also:#to_i?.


def to_i8(base : Int = 10) #

See also:#to_i.


def to_i8?(base : Int = 10) #

See also:#to_i?.


def to_i?(base : Int = 10) : Int32 | Nil #

Returns the integer value of this char if it's an ASCII char denoting a digit inbase,nil otherwise.

'1'.to_i?     # => 1
'8'.to_i?     # => 8
'c'.to_i?     # => nil
'1'.to_i?(16) # => 1
'a'.to_i?(16) # => 10
'f'.to_i?(16) # => 15
'z'.to_i?(16) # => nil

def to_s(io : IO) : Nil #

Appends this char to the givenIO.

This appends this char's bytes as encoded by UTF-8 to the givenIO.


def to_s : String #

Returns this char as a string containing this char as a single character.

'a'.to_s # => "a"
'あ'.to_s # => "あ"

def to_u128(base : Int = 10) #

See also:#to_i.


def to_u128?(base : Int = 10) #

See also:#to_i?.


def to_u16(base : Int = 10) #

See also:#to_i.


def to_u16?(base : Int = 10) #

See also:#to_i?.


def to_u32(base : Int = 10) #

See also:#to_i.


def to_u32?(base : Int = 10) #

See also:#to_i?.


def to_u64(base : Int = 10) #

See also:#to_i.


def to_u64?(base : Int = 10) #

See also:#to_i?.


def to_u8(base : Int = 10) #

See also:#to_i.


def to_u8?(base : Int = 10) #

See also:#to_i?.


def unicode_escape(io : IO) : Nil #

Returns the Unicode escape sequence representing this character.

The codepoints are expressed as hexadecimal digits with uppercase letters. Unicode escapes always use the four digit style for codepointsU+FFFF and lower, adding leading zeros when necessary. Higher codepoints have their digits wrapped in curly braces and no leading zeros.

'a'.unicode_escape      # => "\\u0061"
'\t'.unicode_escape     # => "\\u0009"
'あ'.unicode_escape      # => "\\u3042"
'\u0012'.unicode_escape # => "\\u0012"
'😀'.unicode_escape      # => "\\u{1F600}"

def unicode_escape : String #

Returns the Unicode escape sequence representing this character.

The codepoints are expressed as hexadecimal digits with uppercase letters. Unicode escapes always use the four digit style for codepointsU+FFFF and lower, adding leading zeros when necessary. Higher codepoints have their digits wrapped in curly braces and no leading zeros.

'a'.unicode_escape      # => "\\u0061"
'\t'.unicode_escape     # => "\\u0009"
'あ'.unicode_escape      # => "\\u3042"
'\u0012'.unicode_escape # => "\\u0012"
'😀'.unicode_escape      # => "\\u{1F600}"

def upcase(io : IO, options : Unicode::CaseOptions = :none) : Nil #

Writes the upcase equivalent of this char to the givenio.

This method takes into account the possibility that an upcase version of a char might result in multiple chars, like for 'ffl', which results in'F','F' and'L'.

'z'.upcase(STDOUT) # prints "Z"
'ffl'.upcase(STDOUT) # prints "FFL"

def upcase(options : Unicode::CaseOptions = :none) : Char #

Returns the upcase equivalent of this char.

Note that this only works for characters whose upcase equivalent yields a single codepoint. There are a few characters, like'ffl', than when upcased result in multiple characters (in this case:'F','F','L').

For more correct behavior see the overloads that receive a block or anIO.

'z'.upcase # => 'Z'
'X'.upcase # => 'X'
'.'.upcase # => '.'

def upcase(options : Unicode::CaseOptions = :none, &) #

Yields each char for the upcase equivalent of this char.

This method takes into account the possibility that an upcase version of a char might result in multiple chars, like for 'ffl', which results in'F','F' and'L'.

'z'.upcase { |v| puts v } # prints 'Z'
'ffl'.upcase { |v| puts v } # prints 'F', 'F', 'L'

def uppercase? : Bool #

Returnstrue if this char is an uppercase letter.

'H'.uppercase? # => true
'Á'.uppercase? # => true
'c'.uppercase? # => false
'.'.uppercase? # => false
'Dz'.uppercase? # => false

def whitespace? : Bool #

Returnstrue if this char is a whitespace according to unicode.

' '.whitespace?  # => true
'\t'.whitespace? # => true
'b'.whitespace?  # => false