Lazy Diary @ Hatena Blog

PowerShell / Java / miscellaneous things about software development, Tips & Gochas. CC BY-SA 4.0/Apache License 2.0

ASCIIとLatin-1の各文字の文字タイプ

以下のスクリプトを実行し、ASCIIおよびLatin-1の各文字の文字タイプがUnicodeでどう定義されているか確認した。

# Compile a helper type with P/Invoke declarations for enabling / disabling VT support.
# Note: This incurs a once-per-session peformance penalty.
$consoleHelper = Add-Type -PassThru -Namespace "NS$PID" -Name ConsoleHelper -MemberDefinition @'
  [DllImport("kernel32.dll", SetLastError=true)]
  static extern bool SetConsoleMode(IntPtr hConsoleHandle, int mode);
  [DllImport("kernel32.dll", SetLastError=true)]
  static extern bool GetConsoleMode(IntPtr handle, out int mode);
  [DllImport("kernel32.dll", SetLastError=true)]
  static extern IntPtr GetStdHandle(int handle);

  public static void EnableVtSupport(bool enable = true) {
    IntPtr outHandle = GetStdHandle(-11); // -11 == STD_OUTPUT_HANDLE
    int mode;
    GetConsoleMode(outHandle, out mode);
    SetConsoleMode(outHandle, enable ? (mode | 0x4) : (mode & ~0x4)); // 0x4 == ENABLE_VIRTUAL_TERMINAL_PROCESSING
  }
'@

function Show-ControlCharacters {
    param (
        [string]$inputString
    )

    $visualMap = @{
        # ASCII C0 control codes (ANSI X3.4)
        ("{0}" -F [Char]0) = "␀";
        ("{0}" -F [Char]1) = '␁';
        ("{0}" -F [Char]2) = '␂';
        ("{0}" -F [Char]3) = '␃';
        ("{0}" -F [Char]4) = '␄';
        ("{0}" -F [Char]5) = '␅';
        ("{0}" -F [Char]6) = '␆';
        ("{0}" -F [Char]7) = '␇';
        ("{0}" -F [Char]8) = '␈';
        ("{0}" -F [Char]9) = '␉';
        ("{0}" -F [Char]10) = '␊';
        ("{0}" -F [Char]11) = '␋';
        ("{0}" -F [Char]12) = '␌';
        ("{0}" -F [Char]13) = '␍';
        ("{0}" -F [Char]14) = '␎';
        ("{0}" -F [Char]15) = '␏';
        ("{0}" -F [Char]16) = '␐';
        ("{0}" -F [Char]17) = '␑';
        ("{0}" -F [Char]18) = '␒';
        ("{0}" -F [Char]19) = '␓';
        ("{0}" -F [Char]20) = '␔';
        ("{0}" -F [Char]21) = '␕';
        ("{0}" -F [Char]22) = '␖';
        ("{0}" -F [Char]23) = '␗';
        ("{0}" -F [Char]24) = '␘';
        ("{0}" -F [Char]25) = '␙';
        ("{0}" -F [Char]26) = '␚';
        ("{0}" -F [Char]27) = '␛';
        ("{0}" -F [Char]28) = '␜';
        ("{0}" -F [Char]29) = '␝';
        ("{0}" -F [Char]30) = '␞';
        ("{0}" -F [Char]31) = '␟';
        # ("{0}" -F [Char]32) = '␠';
        ("{0}" -F [Char]127) = '␡';

        # ISO/IEC 6429 C1 Control codes
        ("{0}" -F [Char]128) = "<PAD>";
        ("{0}" -F [Char]129) = "<HOP>";
        ("{0}" -F [Char]130) = "<BPH>";
        ("{0}" -F [Char]131) = "<NBH>";
        ("{0}" -F [Char]132) = "<IND>";
        ("{0}" -F [Char]133) = "<NEL>";
        ("{0}" -F [Char]134) = "<SSA>";
        ("{0}" -F [Char]135) = "<ESA>";
        ("{0}" -F [Char]136) = "<HTS>";
        ("{0}" -F [Char]137) = "<HTJ>";
        ("{0}" -F [Char]138) = "<VTS>";
        ("{0}" -F [Char]139) = "<PLD>";
        ("{0}" -F [Char]140) = "<PLU>";
        ("{0}" -F [Char]141) = "<RI>";
        ("{0}" -F [Char]142) = "<SS2>";
        ("{0}" -F [Char]143) = "<SS3>";
        ("{0}" -F [Char]144) = "<DCS>";
        ("{0}" -F [Char]145) = "<PU1>";
        ("{0}" -F [Char]146) = "<PU2>";
        ("{0}" -F [Char]147) = "<STS>";
        ("{0}" -F [Char]148) = "<CCH>";
        ("{0}" -F [Char]149) = "<MW>";
        ("{0}" -F [Char]150) = "<SPA>";
        ("{0}" -F [Char]151) = "<EPA>";
        ("{0}" -F [Char]152) = "<SOS>";
        ("{0}" -F [Char]153) = "<SGC>";
        ("{0}" -F [Char]154) = "<SCI>";
        ("{0}" -F [Char]155) = "<CSI>";
        ("{0}" -F [Char]156) = "<ST>";
        ("{0}" -F [Char]157) = "<OSC>";
        ("{0}" -F [Char]158) = "<PM>";
        ("{0}" -F [Char]159) = "<APC>";

        # Add more mappings as needed
    }

    foreach ($key in $visualMap.Keys) {
        $inputString = $inputString -replace $key, $visualMap[$key]
    }

    return $inputString
}

# Temporarily turn off VT support.
$consoleHelper::EnableVtSupport($false)

0..255 | ForEach-Object { $c = [Char]$_; $type = [System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($c); "[{0:x}],[{1}], [{2}]" -F $_, (Show-ControlCharacters($c)), $type }

結果は以下。

[0],[␀], [Control]
[1],[␁], [Control]
[2],[␂], [Control]
[3],[␃], [Control]
[4],[␄], [Control]
[5],[␅], [Control]
[6],[␆], [Control]
[7],[␇], [Control]
[8],[␈], [Control]
[9],[␉], [Control]
[a],[␊], [Control]
[b],[␋], [Control]
[c],[␌], [Control]
[d],[␍], [Control]
[e],[␎], [Control]
[f],[␏], [Control]
[10],[␐], [Control]
[11],[␑], [Control]
[12],[␒], [Control]
[13],[␓], [Control]
[14],[␔], [Control]
[15],[␕], [Control]
[16],[␖], [Control]
[17],[␗], [Control]
[18],[␘], [Control]
[19],[␙], [Control]
[1a],[␚], [Control]
[1b],[␛], [Control]
[1c],[␜], [Control]
[1d],[␝], [Control]
[1e],[␞], [Control]
[1f],[␟], [Control]
[20],[ ], [SpaceSeparator]
[21],[!], [OtherPunctuation]
[22],["], [OtherPunctuation]
[23],[#], [OtherPunctuation]
[24],[$], [CurrencySymbol]
[25],[%], [OtherPunctuation]
[26],[&], [OtherPunctuation]
[27],['], [OtherPunctuation]
[28],[(], [OpenPunctuation]
[29],[)], [ClosePunctuation]
[2a],[*], [OtherPunctuation]
[2b],[+], [MathSymbol]
[2c],[,], [OtherPunctuation]
[2d],[-], [DashPunctuation]
[2e],[.], [OtherPunctuation]
[2f],[/], [OtherPunctuation]
[30],[0], [DecimalDigitNumber]
[31],[1], [DecimalDigitNumber]
[32],[2], [DecimalDigitNumber]
[33],[3], [DecimalDigitNumber]
[34],[4], [DecimalDigitNumber]
[35],[5], [DecimalDigitNumber]
[36],[6], [DecimalDigitNumber]
[37],[7], [DecimalDigitNumber]
[38],[8], [DecimalDigitNumber]
[39],[9], [DecimalDigitNumber]
[3a],[:], [OtherPunctuation]
[3b],[;], [OtherPunctuation]
[3c],[<], [MathSymbol]
[3d],[=], [MathSymbol]
[3e],[>], [MathSymbol]
[3f],[?], [OtherPunctuation]
[40],[@], [OtherPunctuation]
[41],[A], [UppercaseLetter]
[42],[B], [UppercaseLetter]
[43],[C], [UppercaseLetter]
[44],[D], [UppercaseLetter]
[45],[E], [UppercaseLetter]
[46],[F], [UppercaseLetter]
[47],[G], [UppercaseLetter]
[48],[H], [UppercaseLetter]
[49],[I], [UppercaseLetter]
[4a],[J], [UppercaseLetter]
[4b],[K], [UppercaseLetter]
[4c],[L], [UppercaseLetter]
[4d],[M], [UppercaseLetter]
[4e],[N], [UppercaseLetter]
[4f],[O], [UppercaseLetter]
[50],[P], [UppercaseLetter]
[51],[Q], [UppercaseLetter]
[52],[R], [UppercaseLetter]
[53],[S], [UppercaseLetter]
[54],[T], [UppercaseLetter]
[55],[U], [UppercaseLetter]
[56],[V], [UppercaseLetter]
[57],[W], [UppercaseLetter]
[58],[X], [UppercaseLetter]
[59],[Y], [UppercaseLetter]
[5a],[Z], [UppercaseLetter]
[5b],[[], [OpenPunctuation]
[5c],[\], [OtherPunctuation]
[5d],[]], [ClosePunctuation]
[5e],[^], [ModifierSymbol]
[5f],[_], [ConnectorPunctuation]
[60],[`], [ModifierSymbol]
[61],[a], [LowercaseLetter]
[62],[b], [LowercaseLetter]
[63],[c], [LowercaseLetter]
[64],[d], [LowercaseLetter]
[65],[e], [LowercaseLetter]
[66],[f], [LowercaseLetter]
[67],[g], [LowercaseLetter]
[68],[h], [LowercaseLetter]
[69],[i], [LowercaseLetter]
[6a],[j], [LowercaseLetter]
[6b],[k], [LowercaseLetter]
[6c],[l], [LowercaseLetter]
[6d],[m], [LowercaseLetter]
[6e],[n], [LowercaseLetter]
[6f],[o], [LowercaseLetter]
[70],[p], [LowercaseLetter]
[71],[q], [LowercaseLetter]
[72],[r], [LowercaseLetter]
[73],[s], [LowercaseLetter]
[74],[t], [LowercaseLetter]
[75],[u], [LowercaseLetter]
[76],[v], [LowercaseLetter]
[77],[w], [LowercaseLetter]
[78],[x], [LowercaseLetter]
[79],[y], [LowercaseLetter]
[7a],[z], [LowercaseLetter]
[7b],[{], [OpenPunctuation]
[7c],[|], [MathSymbol]
[7d],[}], [ClosePunctuation]
[7e],[~], [MathSymbol]
[7f],[␡], [Control]
[80],[<PAD>], [Control]
[81],[<HOP>], [Control]
[82],[<BPH>], [Control]
[83],[<NBH>], [Control]
[84],[<IND>], [Control]
[85],[<NEL>], [Control]
[86],[<SSA>], [Control]
[87],[<ESA>], [Control]
[88],[<HTS>], [Control]
[89],[<HTJ>], [Control]
[8a],[<VTS>], [Control]
[8b],[<PLD>], [Control]
[8c],[<PLU>], [Control]
[8d],[<RI>], [Control]
[8e],[<SS2>], [Control]
[8f],[<SS3>], [Control]
[90],[<DCS>], [Control]
[91],[<PU1>], [Control]
[92],[<PU2>], [Control]
[93],[<STS>], [Control]
[94],[<CCH>], [Control]
[95],[<MW>], [Control]
[96],[<SPA>], [Control]
[97],[<EPA>], [Control]
[98],[<SOS>], [Control]
[99],[<SGC>], [Control]
[9a],[<SCI>], [Control]
[9b],[<CSI>], [Control]
[9c],[<ST>], [Control]
[9d],[<OSC>], [Control]
[9e],[<PM>], [Control]
[9f],[<APC>], [Control]
[a0],[ ], [SpaceSeparator]
[a1],[¡], [OtherPunctuation]
[a2],[¢], [CurrencySymbol]
[a3],[£], [CurrencySymbol]
[a4],[¤], [CurrencySymbol]
[a5],[¥], [CurrencySymbol]
[a6],[¦], [OtherSymbol]
[a7],[§], [OtherPunctuation]
[a8],[¨], [ModifierSymbol]
[a9],[©], [OtherSymbol]
[aa],[ª], [OtherLetter]
[ab],[«], [InitialQuotePunctuation]
[ac],[¬], [MathSymbol]
[ad],[­], [Format]
[ae],[®], [OtherSymbol]
[af],[¯], [ModifierSymbol]
[b0],[°], [OtherSymbol]
[b1],[±], [MathSymbol]
[b2],[²], [OtherNumber]
[b3],[³], [OtherNumber]
[b4],[´], [ModifierSymbol]
[b5],[µ], [LowercaseLetter]
[b6],[¶], [OtherPunctuation]
[b7],[·], [OtherPunctuation]
[b8],[¸], [ModifierSymbol]
[b9],[¹], [OtherNumber]
[ba],[º], [OtherLetter]
[bb],[»], [FinalQuotePunctuation]
[bc],[¼], [OtherNumber]
[bd],[½], [OtherNumber]
[be],[¾], [OtherNumber]
[bf],[¿], [OtherPunctuation]
[c0],[À], [UppercaseLetter]
[c1],[Á], [UppercaseLetter]
[c2],[Â], [UppercaseLetter]
[c3],[Ã], [UppercaseLetter]
[c4],[Ä], [UppercaseLetter]
[c5],[Å], [UppercaseLetter]
[c6],[Æ], [UppercaseLetter]
[c7],[Ç], [UppercaseLetter]
[c8],[È], [UppercaseLetter]
[c9],[É], [UppercaseLetter]
[ca],[Ê], [UppercaseLetter]
[cb],[Ë], [UppercaseLetter]
[cc],[Ì], [UppercaseLetter]
[cd],[Í], [UppercaseLetter]
[ce],[Î], [UppercaseLetter]
[cf],[Ï], [UppercaseLetter]
[d0],[Ð], [UppercaseLetter]
[d1],[Ñ], [UppercaseLetter]
[d2],[Ò], [UppercaseLetter]
[d3],[Ó], [UppercaseLetter]
[d4],[Ô], [UppercaseLetter]
[d5],[Õ], [UppercaseLetter]
[d6],[Ö], [UppercaseLetter]
[d7],[×], [MathSymbol]
[d8],[Ø], [UppercaseLetter]
[d9],[Ù], [UppercaseLetter]
[da],[Ú], [UppercaseLetter]
[db],[Û], [UppercaseLetter]
[dc],[Ü], [UppercaseLetter]
[dd],[Ý], [UppercaseLetter]
[de],[Þ], [UppercaseLetter]
[df],[ß], [LowercaseLetter]
[e0],[à], [LowercaseLetter]
[e1],[á], [LowercaseLetter]
[e2],[â], [LowercaseLetter]
[e3],[ã], [LowercaseLetter]
[e4],[ä], [LowercaseLetter]
[e5],[å], [LowercaseLetter]
[e6],[æ], [LowercaseLetter]
[e7],[ç], [LowercaseLetter]
[e8],[è], [LowercaseLetter]
[e9],[é], [LowercaseLetter]
[ea],[ê], [LowercaseLetter]
[eb],[ë], [LowercaseLetter]
[ec],[ì], [LowercaseLetter]
[ed],[í], [LowercaseLetter]
[ee],[î], [LowercaseLetter]
[ef],[ï], [LowercaseLetter]
[f0],[ð], [LowercaseLetter]
[f1],[ñ], [LowercaseLetter]
[f2],[ò], [LowercaseLetter]
[f3],[ó], [LowercaseLetter]
[f4],[ô], [LowercaseLetter]
[f5],[õ], [LowercaseLetter]
[f6],[ö], [LowercaseLetter]
[f7],[÷], [MathSymbol]
[f8],[ø], [LowercaseLetter]
[f9],[ù], [LowercaseLetter]
[fa],[ú], [LowercaseLetter]
[fb],[û], [LowercaseLetter]
[fc],[ü], [LowercaseLetter]
[fd],[ý], [LowercaseLetter]
[fe],[þ], [LowercaseLetter]
[ff],[ÿ], [LowercaseLetter]