CharUnicodeInfo CharUnicodeInfo CharUnicodeInfo CharUnicodeInfo Class

定義

擷取 Unicode 字元的相關資訊。Retrieves information about a Unicode character. 這個類別無法被繼承。This class cannot be inherited.

public ref class CharUnicodeInfo abstract sealed
public static class CharUnicodeInfo
type CharUnicodeInfo = class
Public Class CharUnicodeInfo
繼承
CharUnicodeInfoCharUnicodeInfoCharUnicodeInfoCharUnicodeInfo

範例

下列程式碼範例顯示不同字元類型的每個方法所傳回的值。The following code example shows the values returned by each method for different types of characters.

using namespace System;
using namespace System::Globalization;
void PrintProperties( Char c );
int main()
{
   Console::WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );
   Console::Write( "U+0061 LATIN SMALL LETTER A            " );
   PrintProperties( L'a' );
   Console::Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
   PrintProperties( L'\u0393' );
   Console::Write( "U+0039 DIGIT NINE                      " );
   PrintProperties( L'9' );
   Console::Write( "U+00B2 SUPERSCRIPT TWO                 " );
   PrintProperties( L'\u00B2' );
   Console::Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
   PrintProperties( L'\u00BC' );
   Console::Write( "U+0BEF TAMIL DIGIT NINE                " );
   PrintProperties( L'\u0BEF' );
   Console::Write( "U+0BF0 TAMIL NUMBER TEN                " );
   PrintProperties( L'\u0BF0' );
   Console::Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
   PrintProperties( L'\u0F33' );
   Console::Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
   PrintProperties( L'\u2788' );
}

void PrintProperties( Char c )
{
   Console::Write( " {0,-3}", c );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetNumericValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDigitValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDecimalDigitValue( c ) );
   Console::WriteLine( "{0}", CharUnicodeInfo::GetUnicodeCategory( c ) );
}

/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/
using System;
using System.Globalization;

public class SamplesCharUnicodeInfo  {

   public static void Main()  {

      Console.WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );

      Console.Write( "U+0061 LATIN SMALL LETTER A            " );
      PrintProperties( 'a' );

      Console.Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
      PrintProperties( '\u0393' );

      Console.Write( "U+0039 DIGIT NINE                      " );
      PrintProperties( '9' );

      Console.Write( "U+00B2 SUPERSCRIPT TWO                 " );
      PrintProperties( '\u00B2' );

      Console.Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
      PrintProperties( '\u00BC' );

      Console.Write( "U+0BEF TAMIL DIGIT NINE                " );
      PrintProperties( '\u0BEF' );

      Console.Write( "U+0BF0 TAMIL NUMBER TEN                " );
      PrintProperties( '\u0BF0' );

      Console.Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
      PrintProperties( '\u0F33' );

      Console.Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
      PrintProperties( '\u2788' );

   }

   public static void PrintProperties( char c )  {
      Console.Write( " {0,-3}", c );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetNumericValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDigitValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDecimalDigitValue( c ) );
      Console.WriteLine( "{0}", CharUnicodeInfo.GetUnicodeCategory( c ) );
   }

}


/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/

Imports System.Globalization

Public Class SamplesCharUnicodeInfo   

   Public Shared Sub Main()

      Console.WriteLine("                                        c  Num   Dig   Dec   UnicodeCategory")

      Console.Write("U+0061 LATIN SMALL LETTER A            ")
      PrintProperties("a"c)

      Console.Write("U+0393 GREEK CAPITAL LETTER GAMMA      ")
      PrintProperties(ChrW(&H0393))

      Console.Write("U+0039 DIGIT NINE                      ")
      PrintProperties("9"c)

      Console.Write("U+00B2 SUPERSCRIPT TWO                 ")
      PrintProperties(ChrW(&H00B2))

      Console.Write("U+00BC VULGAR FRACTION ONE QUARTER     ")
      PrintProperties(ChrW(&H00BC))

      Console.Write("U+0BEF TAMIL DIGIT NINE                ")
      PrintProperties(ChrW(&H0BEF))

      Console.Write("U+0BF0 TAMIL NUMBER TEN                ")
      PrintProperties(ChrW(&H0BF0))

      Console.Write("U+0F33 TIBETAN DIGIT HALF ZERO         ")
      PrintProperties(ChrW(&H0F33))

      Console.Write("U+2788 CIRCLED SANS-SERIF DIGIT NINE   ")
      PrintProperties(ChrW(&H2788))

   End Sub

   Public Shared Sub PrintProperties(c As Char)
      Console.Write(" {0,-3}", c)
      Console.Write(" {0,-5}", CharUnicodeInfo.GetNumericValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDigitValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDecimalDigitValue(c))
      Console.WriteLine("{0}", CharUnicodeInfo.GetUnicodeCategory(c))
   End Sub

End Class


'This code produces the following output.  Some characters might not display at the console.
'
'                                        c  Num   Dig   Dec   UnicodeCategory
'U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
'U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
'U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
'U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
'U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
'U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
'U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
'U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
'U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

備註

Unicode 標準會定義數個 unicode 字元類別目錄。The Unicode Standard defines a number of Unicode character categories. 例如, 字元可能會分類為大寫字母、小寫字母、十進位數數位、字母數位、段落分隔符號、數學符號或貨幣符號。For example, a character might be categorized as an uppercase letter, a lowercase letter, a decimal digit number, a letter number, a paragraph separator, a math symbol, or a currency symbol. 您的應用程式可以使用字元類別來管理以字串為基礎的作業, 例如使用正則運算式剖析或解壓縮子字串。Your application can use the character category to govern string-based operations, such as parsing or extracting substring with regular expressions. UnicodeCategory列舉會定義可能的字元類別目錄。The UnicodeCategory enumeration defines the possible character categories.

您可以使用CharUnicodeInfo類別來取得特定UnicodeCategory字元的值。You use the CharUnicodeInfo class to obtain the UnicodeCategory value for a specific character. CharUnicodeInfo類別會定義傳回下列 Unicode 字元值的方法:The CharUnicodeInfo class defines methods that return the following Unicode character values:

  • 字元或代理配對所屬的特定類別。The specific category to which a character or surrogate pair belongs. 傳回的值是UnicodeCategory列舉的成員。The value returned is a member of the UnicodeCategory enumeration.

  • 數值。Numeric value. 僅適用于數位字元, 包括分數、注標、上標、羅馬數字、貨幣分子、encircled 數位和腳本特定數位。Applies only to numeric characters, including fractions, subscripts, superscripts, Roman numerals, currency numerators, encircled numbers, and script-specific digits.

  • 數位值。Digit value. 適用于可與其他數位字元結合的數值字元, 以代表編號系統中的整數。Applies to numeric characters that can be combined with other numeric characters to represent a whole number in a numbering system.

  • 十進位數值。Decimal digit value. 僅適用于十進位 (基底 10) 系統中代表十進位數的字元。Applies only to characters that represent decimal digits in the decimal (base 10) system. 十進位數可以是10位數的其中一個, 從零到9。A decimal digit can be one of ten digits, from zero through nine. 這些字元是UnicodeCategory.DecimalDigitNumber類別目錄的成員。These characters are members of the UnicodeCategory.DecimalDigitNumber category.

此外, CharUnicodeInfo類別是由許多其他 .NET Framework 類型和依賴字元分類的方法在內部使用。In addition, the CharUnicodeInfo class is used internally by a number of other .NET Framework types and methods that rely on character classification. 它們包括:These include:

  • StringInfo類別, 適用于文字元素, 而不是字串中的單一字元。The StringInfo class, which works with textual elements instead of single characters in a string.

  • Char.GetUnicodeCategory方法的多載, 可決定字元或代理組所屬的分類。The overloads of the Char.GetUnicodeCategory method, which determine the category to which a character or surrogate pair belongs.

  • 所識別Regex字元類別, .NET Framework 的正則運算式引擎。The character classes recognized by Regex, the .NET Framework's regular expression engine.

在您的應用程式中使用這個類別時, 請記住下列使用Char類型的程式設計考慮。When using this class in your applications, keep in mind the following programming considerations for using the Char type. 類型可能很容易使用, 而字串通常是用來表示語言內容的理想作法。The type can be difficult to use, and strings are generally preferable for representing linguistic content.

  • Char物件不一定會對應至單一字元。A Char object does not always correspond to a single character. Char雖然類型代表單一16位值, 但某些字元 (例如語素簇叢集和代理配對) 是由兩個或多個 utf-16 程式碼單位所組成。Although the Char type represents a single 16-bit value, some characters (such as grapheme clusters and surrogate pairs) consist of two or more UTF-16 code units. 如需詳細資訊, 請參閱類別中的String 「字元物件和 Unicode 字元」。For more information, see "Char Objects and Unicode Characters" in the String class.

  • 「字元」的概念也有彈性。The notion of a "character" is also flexible. 字元通常會被視為圖像, 但許多圖像都需要多個程式碼點。A character is often thought of as a glyph, but many glyphs require multiple code points. 例如, 您可以透過兩個程式碼點 (「a」加上 U + 0308, 也就是結合的分項) 或單一程式碼點 ("ä" 或 U + 00A4) 來表示ä。For example, ä can be represented either by two code points ("a" plus U+0308, which is the combining diaeresis), or by a single code point ("ä" or U+00A4). 某些語言有許多字母、字元和圖像需要多個程式碼點, 這可能會造成語言內容表示的混淆。Some languages have many letters, characters, and glyphs that require multiple code points, which can cause confusion in linguistic content representation. 例如, 有一個ΰ (U + 03B0, 希臘文小寫字母 upsilon, 加上 dialytika 和 tonos), 但沒有對等的大寫字母。For example, there is a ΰ (U+03B0, Greek small letter upsilon with dialytika and tonos), but there is no equivalent capital letter. Uppercasing 這類值只會抓取原始值。Uppercasing such a value simply retrieves the original value.

給呼叫者的注意事項

已辨識的字元和它們所屬的特定類別是由 Unicode 標準所定義, 而且可以從 Unicode 標準的某個版本變更為另一個版本。Recognized characters and the specific categories to which they belong are defined by the Unicode standard and can change from one version of the Unicode Standard to another. 特定版本的 .NET Framework 中的字元分類是以 Unicode Standard 的單一版本為基礎, 而不論 .NET Framework 執行所在的基礎作業系統為何。Categorization of characters in a particular version of the .NET Framework is based on a single version of the Unicode Standard regardless of the underlying operating system on which the .NET Framework is running. 下表列出自之後.NET Framework 4.NET Framework 4的 .NET Framework 版本, 以及用來分類字元的 Unicode 標準版本。The following table lists versions of the .NET Framework since the .NET Framework 4.NET Framework 4 and the versions of the Unicode Standard used to classify characters.

.NET Framework 版本.NET Framework version Unicode Standard 版本Version of the Unicode Standard
[!INCLUDE[net_v40_long](~/includes/net-v40-long-md.md)] [Unicode 標準, 第5.0.0 版](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v45](~/includes/net-v45-md.md)] [Unicode 標準, 第5.0.0 版](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v451](~/includes/net-v451-md.md)] [Unicode 標準, 第5.0.0 版](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v452](~/includes/net-v452-md.md)] [Unicode 標準, 第5.0.0 版](https://www.unicode.org/versions/Unicode5.0.0/) [The Unicode Standard, Version 5.0.0](https://www.unicode.org/versions/Unicode5.0.0/)
[!INCLUDE[net_v46](~/includes/net-v46-md.md)] [Unicode Standard, 版本6.3。0](https://www.unicode.org/versions/Unicode6.3.0/) [The Unicode Standard, Version 6.3.0](https://www.unicode.org/versions/Unicode6.3.0/)
[!INCLUDE[net_v461](~/includes/net-v461-md.md)] [Unicode Standard, 版本6.3。0](https://www.unicode.org/versions/Unicode6.3.0/) [The Unicode Standard, Version 6.3.0](https://www.unicode.org/versions/Unicode6.3.0/)
[!INCLUDE[net_v462](~/includes/net-v462-md.md)] [Unicode 標準版本8.0。0](https://www.unicode.org/versions/Unicode8.0.0/) [The Unicode Standard, Version 8.0.0](https://www.unicode.org/versions/Unicode8.0.0/)

Unicode 標準的每個版本都包含自上一版以來 Unicode 字元資料庫變更的資訊。Each version of the Unicode standard includes information on changes to the Unicode character database since the previous version. CharUnicodeInfo類別會使用 Unicode 字元資料庫來分類字元。The Unicode character database is used by the CharUnicodeInfo class for categorizing characters.

方法

GetDecimalDigitValue(Char) GetDecimalDigitValue(Char) GetDecimalDigitValue(Char) GetDecimalDigitValue(Char)

取得指定之數字字元的十進位數值。Gets the decimal digit value of the specified numeric character.

GetDecimalDigitValue(String, Int32) GetDecimalDigitValue(String, Int32) GetDecimalDigitValue(String, Int32) GetDecimalDigitValue(String, Int32)

取得數字字元的十進位數值,其位於指定字串的指定索引處。Gets the decimal digit value of the numeric character at the specified index of the specified string.

GetDigitValue(Char) GetDigitValue(Char) GetDigitValue(Char) GetDigitValue(Char)

取得指定之數字字元的數值。Gets the digit value of the specified numeric character.

GetDigitValue(String, Int32) GetDigitValue(String, Int32) GetDigitValue(String, Int32) GetDigitValue(String, Int32)

取得數字字元的數值,其位於指定字串的指定索引處。Gets the digit value of the numeric character at the specified index of the specified string.

GetNumericValue(Char) GetNumericValue(Char) GetNumericValue(Char) GetNumericValue(Char)

取得與指定字元關聯的數值。Gets the numeric value associated with the specified character.

GetNumericValue(String, Int32) GetNumericValue(String, Int32) GetNumericValue(String, Int32) GetNumericValue(String, Int32)

取得數值,該值與指定字串之指定索引處的字元關聯。Gets the numeric value associated with the character at the specified index of the specified string.

GetUnicodeCategory(Char) GetUnicodeCategory(Char) GetUnicodeCategory(Char) GetUnicodeCategory(Char)

取得指定之字元的 Unicode 分類。Gets the Unicode category of the specified character.

GetUnicodeCategory(Int32) GetUnicodeCategory(Int32) GetUnicodeCategory(Int32) GetUnicodeCategory(Int32)
GetUnicodeCategory(String, Int32) GetUnicodeCategory(String, Int32) GetUnicodeCategory(String, Int32) GetUnicodeCategory(String, Int32)

取得字元的 Unicode 分類,其位於指定字串的指定索引處。Gets the Unicode category of the character at the specified index of the specified string.

適用於

另請參閱