CharUnicodeInfo 類別

定義

擷取 Unicode 字元的相關資訊。Retrieves information about a Unicode character. 此類別無法獲得繼承。This class cannot be inherited.

public ref class CharUnicodeInfo abstract sealed
public ref class CharUnicodeInfo sealed
public static class CharUnicodeInfo
public sealed class CharUnicodeInfo
type CharUnicodeInfo = class
Public Class CharUnicodeInfo
Public NotInheritable Class CharUnicodeInfo
繼承
CharUnicodeInfo

範例

下列程式碼範例顯示不同類型字元的每個方法所傳回的值。The following code example shows the values returned by each method for different types of characters.

using namespace System;
using namespace System::Globalization;
void PrintProperties( Char c );
int main()
{
   Console::WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );
   Console::Write( "U+0061 LATIN SMALL LETTER A            " );
   PrintProperties( L'a' );
   Console::Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
   PrintProperties( L'\u0393' );
   Console::Write( "U+0039 DIGIT NINE                      " );
   PrintProperties( L'9' );
   Console::Write( "U+00B2 SUPERSCRIPT TWO                 " );
   PrintProperties( L'\u00B2' );
   Console::Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
   PrintProperties( L'\u00BC' );
   Console::Write( "U+0BEF TAMIL DIGIT NINE                " );
   PrintProperties( L'\u0BEF' );
   Console::Write( "U+0BF0 TAMIL NUMBER TEN                " );
   PrintProperties( L'\u0BF0' );
   Console::Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
   PrintProperties( L'\u0F33' );
   Console::Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
   PrintProperties( L'\u2788' );
}

void PrintProperties( Char c )
{
   Console::Write( " {0,-3}", c );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetNumericValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDigitValue( c ) );
   Console::Write( " {0,-5}", CharUnicodeInfo::GetDecimalDigitValue( c ) );
   Console::WriteLine( "{0}", CharUnicodeInfo::GetUnicodeCategory( c ) );
}

/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/
using System;
using System.Globalization;

public class SamplesCharUnicodeInfo  {

   public static void Main()  {

      Console.WriteLine( "                                        c  Num   Dig   Dec   UnicodeCategory" );

      Console.Write( "U+0061 LATIN SMALL LETTER A            " );
      PrintProperties( 'a' );

      Console.Write( "U+0393 GREEK CAPITAL LETTER GAMMA      " );
      PrintProperties( '\u0393' );

      Console.Write( "U+0039 DIGIT NINE                      " );
      PrintProperties( '9' );

      Console.Write( "U+00B2 SUPERSCRIPT TWO                 " );
      PrintProperties( '\u00B2' );

      Console.Write( "U+00BC VULGAR FRACTION ONE QUARTER     " );
      PrintProperties( '\u00BC' );

      Console.Write( "U+0BEF TAMIL DIGIT NINE                " );
      PrintProperties( '\u0BEF' );

      Console.Write( "U+0BF0 TAMIL NUMBER TEN                " );
      PrintProperties( '\u0BF0' );

      Console.Write( "U+0F33 TIBETAN DIGIT HALF ZERO         " );
      PrintProperties( '\u0F33' );

      Console.Write( "U+2788 CIRCLED SANS-SERIF DIGIT NINE   " );
      PrintProperties( '\u2788' );
   }

   public static void PrintProperties( char c )  {
      Console.Write( " {0,-3}", c );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetNumericValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDigitValue( c ) );
      Console.Write( " {0,-5}", CharUnicodeInfo.GetDecimalDigitValue( c ) );
      Console.WriteLine( "{0}", CharUnicodeInfo.GetUnicodeCategory( c ) );
   }
}


/*
This code produces the following output.  Some characters might not display at the console.

                                        c  Num   Dig   Dec   UnicodeCategory
U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

*/

Imports System.Globalization

Public Class SamplesCharUnicodeInfo   

   Public Shared Sub Main()

      Console.WriteLine("                                        c  Num   Dig   Dec   UnicodeCategory")

      Console.Write("U+0061 LATIN SMALL LETTER A            ")
      PrintProperties("a"c)

      Console.Write("U+0393 GREEK CAPITAL LETTER GAMMA      ")
      PrintProperties(ChrW(&H0393))

      Console.Write("U+0039 DIGIT NINE                      ")
      PrintProperties("9"c)

      Console.Write("U+00B2 SUPERSCRIPT TWO                 ")
      PrintProperties(ChrW(&H00B2))

      Console.Write("U+00BC VULGAR FRACTION ONE QUARTER     ")
      PrintProperties(ChrW(&H00BC))

      Console.Write("U+0BEF TAMIL DIGIT NINE                ")
      PrintProperties(ChrW(&H0BEF))

      Console.Write("U+0BF0 TAMIL NUMBER TEN                ")
      PrintProperties(ChrW(&H0BF0))

      Console.Write("U+0F33 TIBETAN DIGIT HALF ZERO         ")
      PrintProperties(ChrW(&H0F33))

      Console.Write("U+2788 CIRCLED SANS-SERIF DIGIT NINE   ")
      PrintProperties(ChrW(&H2788))

   End Sub

   Public Shared Sub PrintProperties(c As Char)
      Console.Write(" {0,-3}", c)
      Console.Write(" {0,-5}", CharUnicodeInfo.GetNumericValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDigitValue(c))
      Console.Write(" {0,-5}", CharUnicodeInfo.GetDecimalDigitValue(c))
      Console.WriteLine("{0}", CharUnicodeInfo.GetUnicodeCategory(c))
   End Sub

End Class


'This code produces the following output.  Some characters might not display at the console.
'
'                                        c  Num   Dig   Dec   UnicodeCategory
'U+0061 LATIN SMALL LETTER A             a   -1    -1    -1   LowercaseLetter
'U+0393 GREEK CAPITAL LETTER GAMMA       \u0393   -1    -1    -1   UppercaseLetter
'U+0039 DIGIT NINE                       9   9     9     9    DecimalDigitNumber
'U+00B2 SUPERSCRIPT TWO                  \u00B2   2     2     2    OtherNumber
'U+00BC VULGAR FRACTION ONE QUARTER      \u00BC   0.25  -1    -1   OtherNumber
'U+0BEF TAMIL DIGIT NINE                 \u0BEF   9     9     9    DecimalDigitNumber
'U+0BF0 TAMIL NUMBER TEN                 \u0BF0   10    -1    -1   OtherNumber
'U+0F33 TIBETAN DIGIT HALF ZERO          \u0F33   -0.5  -1    -1   OtherNumber
'U+2788 CIRCLED SANS-SERIF DIGIT NINE    \u2788   9     9     -1   OtherNumber

備註

Unicode 標準定義許多 unicode 字元類別。The Unicode Standard defines a number of Unicode character categories. 例如,字元可能會分類為大寫字母、小寫字母、十進位數、字母號、段落分隔符號、數學符號,或貨幣符號。For example, a character might be categorized as an uppercase letter, a lowercase letter, a decimal digit number, a letter number, a paragraph separator, a math symbol, or a currency symbol. 您的應用程式可以使用「字元」類別來管理以字串為基礎的作業,例如使用正則運算式剖析或解壓縮子字串。Your application can use the character category to govern string-based operations, such as parsing or extracting substring with regular expressions. UnicodeCategory列舉會定義可能的字元分類。The UnicodeCategory enumeration defines the possible character categories.

CharUnicodeInfo 可以使用類別來取得 UnicodeCategory 特定字元的值。Use the CharUnicodeInfo class to obtain the UnicodeCategory value for a specific character. CharUnicodeInfo類別會定義傳回下列 Unicode 字元值的方法:The CharUnicodeInfo class defines methods that return the following Unicode character values:

  • 字元或代理配對所屬的特定類別。The specific category to which a character or surrogate pair belongs. 傳回的值是列舉的成員 UnicodeCategoryThe value returned is a member of the UnicodeCategory enumeration.

  • 數值。Numeric value. 只適用于數值字元,包括小數、注標、上標、羅馬數字、貨幣分子、encircled 數位和腳本特定數位。Applies only to numeric characters, including fractions, subscripts, superscripts, Roman numerals, currency numerators, encircled numbers, and script-specific digits.

  • 數位值。Digit value. 適用于可與其他數位字元結合的數值字元,以代表編號系統中的整數。Applies to numeric characters that can be combined with other numeric characters to represent a whole number in a numbering system.

  • 十進位位數值。Decimal digit value. 只適用于 decimal (base 10) 系統中代表十進位數的字元。Applies only to characters that represent decimal digits in the decimal (base 10) system. 十進位數可以是10位數的其中一個,從零到九。A decimal digit can be one of ten digits, from zero through nine. 這些字元是 UnicodeCategory.DecimalDigitNumber 類別目錄的成員。These characters are members of the UnicodeCategory.DecimalDigitNumber category.

此外,此 CharUnicodeInfo 類別是由其他許多 .net 類型和依賴字元分類的方法在內部使用。In addition, the CharUnicodeInfo class is used internally by a number of other .NET types and methods that rely on character classification. 它們包括:These include:

  • StringInfo類別,它會使用文字元素,而不是字串中的單一字元。The StringInfo class, which works with textual elements instead of single characters in a string.

  • 方法的多載 Char.GetUnicodeCategory ,可判斷字元或代理組所屬的分類。The overloads of the Char.GetUnicodeCategory method, which determine the category to which a character or surrogate pair belongs.

  • 識別的 字元類別 Regex 。NET 的正則運算式引擎。The character classes recognized by Regex, .NET's regular expression engine.

當您在應用程式中使用這個類別時,請記住下列使用類型的程式設計考慮 CharWhen using this class in your applications, keep in mind the following programming considerations for using the Char type. 型別可能不容易使用,而且字串通常最好用來表示語言內容。The type can be difficult to use, and strings are generally preferable for representing linguistic content.

  • Char物件不一定會對應到單一字元。A Char object does not always correspond to a single character. 雖然 Char 型別代表單一16位值,但某些字元 (例如語素簇叢集和代理配對,) 由兩個或多個 utf-16 程式碼單位組成。Although the Char type represents a single 16-bit value, some characters (such as grapheme clusters and surrogate pairs) consist of two or more UTF-16 code units. 如需詳細資訊,請參閱類別中的「Char 物件和 Unicode 字元」 StringFor more information, see "Char Objects and Unicode Characters" in the String class.

  • 「字元」的概念也有彈性。The notion of a "character" is also flexible. 字元通常會視為圖像,但許多字元都需要多個程式碼點。A character is often thought of as a glyph, but many glyphs require multiple code points. 例如,您可以使用兩個程式碼點來表示ä ( "a" 加上 U + 0308,也就是結合分兩) ,或是由單一程式碼點 ( "ä" 或 U + 00A4) 。For example, ä can be represented either by two code points ("a" plus U+0308, which is the combining diaeresis), or by a single code point ("ä" or U+00A4). 某些語言有許多字母、字元以及需要多個程式碼點的字元,這可能會導致語言內容標記法混淆。Some languages have many letters, characters, and glyphs that require multiple code points, which can cause confusion in linguistic content representation. 例如,有一個ΰ (U + 03B0、以 dialytika 和 tonos) 的希臘文小寫字母,但沒有相等的大寫字母。For example, there is a ΰ (U+03B0, Greek small letter upsilon with dialytika and tonos), but there is no equivalent capital letter. Uppercasing 這類值只會抓取原始值。Uppercasing such a value simply retrieves the original value.

給呼叫者的注意事項

辨識的字元以及它們所屬的特定類別是由 Unicode 標準所定義,而且可以從 Unicode 標準的某個版本變更為另一個版本。Recognized characters and the specific categories to which they belong are defined by the Unicode standard and can change from one version of the Unicode Standard to another. 在特定版本的 .NET Framework 中的字元分類是以單一版本的 Unicode 標準為基礎,而不論 .NET Framework 執行所在的基礎作業系統為何。Categorization of characters in a particular version of .NET Framework is based on a single version of the Unicode Standard, regardless of the underlying operating system on which .NET Framework is running. 下表列出 .NET Framework 的版本,因為 .NET Framework 4 和用來分類字元的 Unicode 標準版本。The following table lists versions of .NET Framework since .NET Framework 4 and the versions of the Unicode Standard used to classify characters.

| .NET Framework 版本 |Unicode 標準版 | |- |- | |.NET Framework 4.NET Framework 4 | 5.0.0 | .NET Framework 4.5.NET Framework 4.5 | | 5.0.0 | |.NET Framework 4.5.1.NET Framework 4.5.1 | 5.0.0 | .NET Framework 4.5.2.NET Framework 4.5.2 | | 5.0.0 | |.NET Framework 4.6.NET Framework 4.6 | 6.3.0 | .NET Framework 4.6.1.NET Framework 4.6.1 | | 6.3.0 | |.NET Framework 4.6.2.NET Framework 4.6.2 | 8.0.0 | | .NET Framework version | Unicode Standard version | | - | - | | .NET Framework 4.NET Framework 4 | 5.0.0 | | .NET Framework 4.5.NET Framework 4.5 | 5.0.0 | | .NET Framework 4.5.1.NET Framework 4.5.1 | 5.0.0 | | .NET Framework 4.5.2.NET Framework 4.5.2 | 5.0.0 | | .NET Framework 4.6.NET Framework 4.6 | 6.3.0 | | .NET Framework 4.6.1.NET Framework 4.6.1 | 6.3.0 | | .NET Framework 4.6.2.NET Framework 4.6.2 | 8.0.0 | 每個 Unicode 標準版本都包含自上一版以來對 Unicode 字元資料庫所做之變更的資訊。Each version of the Unicode standard includes information on changes to the Unicode character database since the previous version. 類別會使用 Unicode 字元資料庫來將 CharUnicodeInfo 字元分類。The Unicode character database is used by the CharUnicodeInfo class for categorizing characters.

方法

GetDecimalDigitValue(Char)

取得指定之數字字元的十進位數值。Gets the decimal digit value of the specified numeric character.

GetDecimalDigitValue(String, Int32)

取得數字字元的十進位數值,其位於指定字串的指定索引處。Gets the decimal digit value of the numeric character at the specified index of the specified string.

GetDigitValue(Char)

取得指定之數字字元的數值。Gets the digit value of the specified numeric character.

GetDigitValue(String, Int32)

取得數字字元的數值,其位於指定字串的指定索引處。Gets the digit value of the numeric character at the specified index of the specified string.

GetNumericValue(Char)

取得與指定字元關聯的數值。Gets the numeric value associated with the specified character.

GetNumericValue(String, Int32)

取得數值,該值與指定字串之指定索引處的字元關聯。Gets the numeric value associated with the character at the specified index of the specified string.

GetUnicodeCategory(Char)

取得指定之字元的 Unicode 分類。Gets the Unicode category of the specified character.

GetUnicodeCategory(Int32)

取得指定之字元的 Unicode 分類。Gets the Unicode category of the specified character.

GetUnicodeCategory(String, Int32)

取得字元的 Unicode 分類,其位於指定字串的指定索引處。Gets the Unicode category of the character at the specified index of the specified string.

適用於

另請參閱