CharUnicodeInfo 类

定义

检索关于 Unicode 字符的信息。Retrieves information about a Unicode character. 此类不能被继承。This class cannot be inherited.

public ref class CharUnicodeInfo abstract sealed
public static class CharUnicodeInfo
type CharUnicodeInfo = class
Public Class CharUnicodeInfo
继承
CharUnicodeInfo

示例

下面的代码示例显示了每种方法为不同类型的字符返回的值。The following code example shows the values returned by each method for different types of characters.

using namespace System;
using namespace System::Globalization;
void PrintProperties( Char c );
int main()
{
   Console::WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );
   Console::Write( "U+0061 LATIN SMALL LETTER A            " );
   PrintProperties( L'a' );
   Console::Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
   PrintProperties( L'\u0393' );
   Console::Write( "U+0039 DIGIT NINE                      " );
   PrintProperties( L'9' );
   Console::Write( "U+00B2 SUPERSCRIPT TWO                 " );
   PrintProperties( L'\u00B2' );
   Console::Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
   PrintProperties( L'\u00BC' );
   Console::Write( "U+0BEF TAMIL DIGIT NINE                " );
   PrintProperties( L'\u0BEF' );
   Console::Write( "U+0BF0 TAMIL NUMBER TEN                " );
   PrintProperties( L'\u0BF0' );
   Console::Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
   PrintProperties( L'\u0F33' );
   Console::Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
   PrintProperties( L'\u2788' );
}

void PrintProperties( Char c )
{
   Console::Write( " {0,-3}", c );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetNumericValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDigitValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDecimalDigitValue( c ) );
   Console::WriteLine( "{0}", CharUnicodeInfo::GetUnicodeCategory( c ) );
}

/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/
using System;
using System.Globalization;

public class SamplesCharUnicodeInfo  {

   public static void Main()  {

      Console.WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );

      Console.Write( "U+0061 LATIN SMALL LETTER A            " );
      PrintProperties( 'a' );

      Console.Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
      PrintProperties( '\u0393' );

      Console.Write( "U+0039 DIGIT NINE                      " );
      PrintProperties( '9' );

      Console.Write( "U+00B2 SUPERSCRIPT TWO                 " );
      PrintProperties( '\u00B2' );

      Console.Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
      PrintProperties( '\u00BC' );

      Console.Write( "U+0BEF TAMIL DIGIT NINE                " );
      PrintProperties( '\u0BEF' );

      Console.Write( "U+0BF0 TAMIL NUMBER TEN                " );
      PrintProperties( '\u0BF0' );

      Console.Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
      PrintProperties( '\u0F33' );

      Console.Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
      PrintProperties( '\u2788' );

   }

   public static void PrintProperties( char c )  {
      Console.Write( " {0,-3}", c );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetNumericValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDigitValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDecimalDigitValue( c ) );
      Console.WriteLine( "{0}", CharUnicodeInfo.GetUnicodeCategory( c ) );
   }

}


/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/

Imports System.Globalization

Public Class SamplesCharUnicodeInfo   

   Public Shared Sub Main()

      Console.WriteLine("                                        c  Num   Dig   Dec   UnicodeCategory")

      Console.Write("U+0061 LATIN SMALL LETTER A            ")
      PrintProperties("a"c)

      Console.Write("U+0393 GREEK CAPITAL LETTER GAMMA      ")
      PrintProperties(ChrW(&H0393))

      Console.Write("U+0039 DIGIT NINE                      ")
      PrintProperties("9"c)

      Console.Write("U+00B2 SUPERSCRIPT TWO                 ")
      PrintProperties(ChrW(&H00B2))

      Console.Write("U+00BC VULGAR FRACTION ONE QUARTER     ")
      PrintProperties(ChrW(&H00BC))

      Console.Write("U+0BEF TAMIL DIGIT NINE                ")
      PrintProperties(ChrW(&H0BEF))

      Console.Write("U+0BF0 TAMIL NUMBER TEN                ")
      PrintProperties(ChrW(&H0BF0))

      Console.Write("U+0F33 TIBETAN DIGIT HALF ZERO         ")
      PrintProperties(ChrW(&H0F33))

      Console.Write("U+2788 CIRCLED SANS-SERIF DIGIT NINE   ")
      PrintProperties(ChrW(&H2788))

   End Sub

   Public Shared Sub PrintProperties(c As Char)
      Console.Write(" {0,-3}", c)
      Console.Write(" {0,-5}", CharUnicodeInfo.GetNumericValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDigitValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDecimalDigitValue(c))
      Console.WriteLine("{0}", CharUnicodeInfo.GetUnicodeCategory(c))
   End Sub

End Class


'This code produces the following output.  Some characters might not display at the console.
'
'                                        c  Num   Dig   Dec   UnicodeCategory
'U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
'U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
'U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
'U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
'U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
'U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
'U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
'U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
'U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

注解

Unicode 标准定义了许多 unicode 字符类别。The Unicode Standard defines a number of Unicode character categories. 例如, 可以将一个字符分类为大写字母、小写字母、十进制数字、字母数字、段落分隔符、数学符号或货币符号。For example, a character might be categorized as an uppercase letter, a lowercase letter, a decimal digit number, a letter number, a paragraph separator, a math symbol, or a currency symbol. 应用程序可以使用字符类别来控制基于字符串的操作, 如使用正则表达式分析或提取子字符串。Your application can use the character category to govern string-based operations, such as parsing or extracting substring with regular expressions. UnicodeCategory枚举定义可能的字符类别。The UnicodeCategory enumeration defines the possible character categories.

使用CharUnicodeInfo类可UnicodeCategory获取特定字符的值。You use the CharUnicodeInfo class to obtain the UnicodeCategory value for a specific character. CharUnicodeInfo类定义返回以下 Unicode 字符值的方法:The CharUnicodeInfo class defines methods that return the following Unicode character values:

  • 字符或代理项对所属的特定类别。The specific category to which a character or surrogate pair belongs. 返回的值是UnicodeCategory枚举的成员。The value returned is a member of the UnicodeCategory enumeration.

  • 数值。Numeric value. 仅适用于数字字符, 包括分数、下标、上标、罗马数字、货币分子、encircled 数和特定于脚本的数字。Applies only to numeric characters, including fractions, subscripts, superscripts, Roman numerals, currency numerators, encircled numbers, and script-specific digits.

  • 数字值。Digit value. 适用于可与其他数字字符组合在一起的数字字符, 以表示编号系统中的整数。Applies to numeric characters that can be combined with other numeric characters to represent a whole number in a numbering system.

  • 十进制数字值。Decimal digit value. 仅适用于在十进制 (以10为基数) 系统中表示十进制数字的字符。Applies only to characters that represent decimal digits in the decimal (base 10) system. 十进制数字可以是从0到9的10位数字之一。A decimal digit can be one of ten digits, from zero through nine. 这些字符是UnicodeCategory.DecimalDigitNumber类别的成员。These characters are members of the UnicodeCategory.DecimalDigitNumber category.

此外, CharUnicodeInfo该类由多个依赖于字符分类的其他 .NET Framework 类型和方法在内部使用。In addition, the CharUnicodeInfo class is used internally by a number of other .NET Framework types and methods that rely on character classification. 这些方法包括:These include:

  • StringInfo类, 它适用于文本元素, 而不是字符串中的单个字符。The StringInfo class, which works with textual elements instead of single characters in a string.

  • Char.GetUnicodeCategory方法的重载, 它确定字符或代理项对所属的类别。The overloads of the Char.GetUnicodeCategory method, which determine the category to which a character or surrogate pair belongs.

  • .NET Framework 的正则表达式Regex引擎识别的字符类The character classes recognized by Regex, the .NET Framework's regular expression engine.

在应用程序中使用此类时, 请记住以下有关使用Char类型的编程注意事项。When using this class in your applications, keep in mind the following programming considerations for using the Char type. 类型可能难以使用, 字符串通常更适合用于表示语言内容。The type can be difficult to use, and strings are generally preferable for representing linguistic content.

  • Char对象并不总是对应于单个字符。A Char object does not always correspond to a single character. Char尽管类型表示单个16位值, 但某些字符 (如字形群集和代理项对) 包含两个或更多 utf-16 代码单元。Although the Char type represents a single 16-bit value, some characters (such as grapheme clusters and surrogate pairs) consist of two or more UTF-16 code units. 有关详细信息, 请参阅类中的String "Char 对象和 Unicode 字符"。For more information, see "Char Objects and Unicode Characters" in the String class.

  • "字符" 的概念也是灵活的。The notion of a "character" is also flexible. 字符通常被视为标志符号, 但很多标志符号需要多个码位。A character is often thought of as a glyph, but many glyphs require multiple code points. 例如, 可以通过两个码位 ("a" 加 U + 0308, 这是组合分音符) 或单个码位 ("ä" 或 U + 00A4) 表示ä。For example, ä can be represented either by two code points ("a" plus U+0308, which is the combining diaeresis), or by a single code point ("ä" or U+00A4). 某些语言的多个字母、字符和字形需要多个码位, 这可能会导致语言内容表示形式出现混淆。Some languages have many letters, characters, and glyphs that require multiple code points, which can cause confusion in linguistic content representation. 例如, 有一个ΰ (U + 03B0, 带 dialytika 和 tonos 的希腊文小写字母 upsilon), 但没有等效的大写字母。For example, there is a ΰ (U+03B0, Greek small letter upsilon with dialytika and tonos), but there is no equivalent capital letter. 大写这样的值即可检索原始值。Uppercasing such a value simply retrieves the original value.

调用方说明

可识别的字符及其所属的特定类别由 Unicode 标准定义, 可以从 Unicode 标准的一个版本更改为另一个版本。Recognized characters and the specific categories to which they belong are defined by the Unicode standard and can change from one version of the Unicode Standard to another. 特定版本的 .NET Framework 中的字符分类基于 Unicode 标准的单个版本, 而与运行 .NET Framework 的基础操作系统无关。Categorization of characters in a particular version of the .NET Framework is based on a single version of the Unicode Standard regardless of the underlying operating system on which the .NET Framework is running. 下表列出了 .NET Framework 的版本.NET Framework 4.NET Framework 4 , 因为和用于对字符进行分类的 Unicode 标准的版本。The following table lists versions of the .NET Framework since the .NET Framework 4.NET Framework 4 and the versions of the Unicode Standard used to classify characters.

.NET Framework 版本.NET Framework version Unicode 标准版本Version of the Unicode Standard
[!INCLUDE[net_v40_long](~/includes/net-v40-long-md.md)] [Unicode 标准版本5.0。0](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v45](~/includes/net-v45-md.md)] [Unicode 标准版本5.0。0](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v451](~/includes/net-v451-md.md)] [Unicode 标准版本5.0。0](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v452](~/includes/net-v452-md.md)] [Unicode 标准版本5.0。0](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v46](~/includes/net-v46-md.md)] [Unicode 标准版本6.3。0](https://www.unicode.org/versions/Unicode6.3.0/) [The Unicode Standard, Version 6.3.0](https://www.unicode.org/versions/Unicode6.3.0/)
[!INCLUDE[net_v461](~/includes/net-v461-md.md)] [Unicode 标准版本6.3。0](https://www.unicode.org/versions/Unicode6.3.0/) [The Unicode Standard, Version 6.3.0](https://www.unicode.org/versions/Unicode6.3.0/)
[!INCLUDE[net_v462](~/includes/net-v462-md.md)] [Unicode 标准版本8.0。0](https://www.unicode.org/versions/Unicode8.0.0/) [The Unicode Standard, Version 8.0.0](https://www.unicode.org/versions/Unicode8.0.0/)

Unicode 标准的每个版本都包含自上一版本以来对 Unicode 字符数据库进行的更改的信息。Each version of the Unicode standard includes information on changes to the Unicode character database since the previous version. CharUnicodeInfo类使用 Unicode 字符数据库对字符进行分类。The Unicode character database is used by the CharUnicodeInfo class for categorizing characters.

方法

GetDecimalDigitValue(Char)

获取指定数值型字符的十进制数字值。Gets the decimal digit value of the specified numeric character.

GetDecimalDigitValue(String, Int32)

获取位于指定字符串的指定索引处的数值型字符的十进制数字值。Gets the decimal digit value of the numeric character at the specified index of the specified string.

GetDigitValue(Char)

获取指定数值型字符的数字值。Gets the digit value of the specified numeric character.

GetDigitValue(String, Int32)

获取位于指定字符串的指定索引处的数值型字符的数字值。Gets the digit value of the numeric character at the specified index of the specified string.

GetNumericValue(Char)

获取与指定字符关联的数值。Gets the numeric value associated with the specified character.

GetNumericValue(String, Int32)

获取与位于指定字符串的指定索引处的字符关联的数值。Gets the numeric value associated with the character at the specified index of the specified string.

GetUnicodeCategory(Char)

获取指定字符的 Unicode 类别。Gets the Unicode category of the specified character.

GetUnicodeCategory(Int32)
GetUnicodeCategory(String, Int32)

获取位于指定字符串的指定索引处的字符的 Unicode 类别。Gets the Unicode category of the character at the specified index of the specified string.

适用于

另请参阅