Accept non-ASCII characters

Consider this program:

#include <stdio.h>
int main(int argc, char* argv[]) {
  printf("%s\n", argv[1]);  
  return 0;
}

I will compile it as follows:

x86_64-w64-mingw32-gcc -o alpha alpha.c

The problem is if I give it an argument without ASCII:

$ ./alpha róisín
r is n

How can I write and / or compile this program so that it accepts non-ASCII characters?

To answer alk : no, the program does not print correctly. See this example:

$ echo Ω | od -tx1c
0000000  ce  a9  0a
        316 251  \n
0000003

$ ./alpha Ω | od -tx1c
0000000  4f  0d  0a
          O  \r  \n
0000003
+4
source share
3 answers

The easiest way to do this is wmain:

#include <fcntl.h>
#include <stdio.h>

int wmain (int argc, wchar_t** argv) {
  _setmode(_fileno(stdout), _O_WTEXT);
  wprintf(L"%s\n", argv[1]);
  return 0;
}

This can also be done with GetCommandLineW; here is a simple version of the code found in the HandBrake repo :

#include <stdio.h>
#include <windows.h>

int get_argv_utf8(int* argc_ptr, char*** argv_ptr) {
  int argc;
  char** argv;
  wchar_t** argv_utf16 = CommandLineToArgvW(GetCommandLineW(), &argc);
  int i;
  int offset = (argc + 1) * sizeof(char*);
  int size = offset;
  for (i = 0; i < argc; i++)
    size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, 0, 0, 0, 0);
  argv = malloc(size);
  for (i = 0; i < argc; i++) {
    argv[i] = (char*) argv + offset;
    offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1,
      argv[i], size-offset, 0, 0);
  }
  *argc_ptr = argc;
  *argv_ptr = argv;
  return 0;
}

int main(int argc, char** argv) {
  get_argv_utf8(&argc, &argv);
  printf("%s\n", argv[1]);
  return 0;
}
+4
source

MinGW ( MinGW-w64, ), Windows API, . , , :

#define _WIN32_WINNT 0x0600
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>

#include <windows.h>

int main (void)
{
    int       argc;
    int       i;
    LPWSTR    *argv;

    argv = CommandLineToArgvW(GetCommandLineW(), &argc);
    if (argv == NULL)
    {
        FormatMessageA(
            (
                FORMAT_MESSAGE_ALLOCATE_BUFFER |
                FORMAT_MESSAGE_FROM_SYSTEM |
                FORMAT_MESSAGE_IGNORE_INSERTS),
            NULL,
            GetLastError(),
            0,
            (LPWSTR)&error, 0,
            NULL);

        fprintf(stderr, error);
        fprintf(stderr, "\n");
        LocalFree(error);
        return EXIT_FAILURE;
    }

    for (i = 0; i < argc; ++i)
        wprintf(L"argv[%d]: %ls\n", i, argv[i]);

    // You must free argv using LocalFree!
    LocalFree(argv);

    return 0;
}

: Windows . Windows, ( ), ,

example -o àlf

Windows, :

argv[0]: example
argv[1]: -o
argv[2]: a\u0300lf

a\u0300 - U+0061 (LATIN SMALL LETTER A), Unicode U+0300 (COMBINING GRAVE ACCENT).

example -o àlf

U+00E0 (LATIN SMALL LETTER A WITH GRAVE), :

argv[0]: example
argv[1]: -o
argv[2]: \u00E0lf

\u00E0 - à, Unicode U + 00E0. , , 1258 . , .

, , NormalizeString. , , , . , , .

+1

:

#include <stdio.h>

int main()
{
    int i = 0;

        for( i=0; i<256; i++){
            printf("\nASCII Character #%d:%c ", i, i);
        }

        printf("\n");

    return 0;
}

128 . FYI Ubuntu, ( GNOME), .

However, if I go to Terminal> Set character encoding ... and select Western (WINDOWS-1252) rather than Unicode (UTF-8) and restart the program, the extended ASCII characters will display correctly.

I do not know the exact steps for Windows / MinGW , but, in short, changing the character encoding should fix your problem .

-1
source

All Articles