Convert hexadecimal string to byte string

I need to convert a (potentially very long) string, for example char * s = "2f0a3f" , to the actual bytes that it represents when decoding from a hexadecimal representation. I am currently doing this, but it feels awkward and wrong.

  size_t hexlength = strlen(s); size_t binlength = hexlength / 2; unsigned char * buffer = malloc(binlength); long i = 0; char a, b; for (; i < hexlength; i += 2) { a = s[i + 0]; b = s[i + 1]; buffer[i / 2] = ((a < '9' ? a - '0' : a - 'a' + 10) << 4) + (b < '9' ? b - '0' : b - 'a' + 10); } 

Two things hit me equally ugly:

  • The way I divide twice each time I go into the buffer
  • Conditional logic for decimal value of hexadecimal digits

Is there a better way? It is advisable not to use something that I will have to add a dependency (since I want to send this code with minimal cross-platform problems). My bit math is terrible;)

NOTE. The data was pre-checked so that everything was lowercase and was the correct string of hexadecimal pairs.

+6
source share
6 answers
 /* allocate the buffer */ char * buffer = malloc((strlen(s) / 2) + 1); char *h = s; /* this will walk through the hex string */ char *b = buffer; /* point inside the buffer */ /* offset into this string is the numeric value */ char xlate[] = "0123456789abcdef"; for ( ; *h; h += 2, ++b) /* go by twos through the hex string */ *b = ((strchr(xlate, *h) - xlate) * 16) /* multiply leading digit by 16 */ + ((strchr(xlate, *(h+1)) - xlate)); 

Edited to add

In an 80x86 build layout, strchr ()'s heart is basically one command — it's not a loop.

Also: this does not check boundaries, will not work with input to the Unicode console, and will fail when transmitting an invalid character.

Also: thanks to those who pointed out some serious typos.

+6
source

Not that it matters a lot, but I would go with multiplication by division. It is also worth splitting the digital code, as you can transfer it to a platform where af not adjacent in the character set (just jokes!)

  inline int digittoint(char d) { return ((d) <= '9' ? (d) - '0' : (d) - 'a' + 10); } #define digittoint(d) ((d) <= '9' ? (d) - '0' : (d) - 'a' + 10) size_t hexlength = strlen(s); size_t binlength = hexlength / 2; unsigned char * buffer = malloc(binlength); long i = 0; char a, b; for (; i < binlength; ++i) { a = s[2 * i + 0]; b = s[2 * i + 1]; buffer[i] = (digittoint(a) << 4) | digittoint(b); } 

I fixed a mistake in your implementation of the "figure to goal" and replaced + bitwise or on the grounds that it better expresses your intentions.

Then you can experiment to find the best implementation of digittoint - conditional arithmetic as above, strspn or a lookup table.

A branched implementation is possible here, which is a bonus! - works on uppercase letters:

 inline int digittoint(char d) { return (d & 0x1f) + ((d >> 6) * 0x19) - 0x10; } 
+4
source

Try something like this:

 const unsigned char bin[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; int hexlength = strlen(s); int binlength = (hexlength / 2); unsigned char * buffer = (unsigned char *) malloc(binlength); if (buffer) { char *hex = s; unsigned char *buf = buffer; unsigned char b, c; int ok = 1; for (int i = 0; i < hexlength; i += 2) { b = bin[*hex++]; c = bin[*hex++]; if ((b == -1) || (c == -1)) { ok = 0; break; ) *buf++ = ((b << 4) | c); } if (ok == 1) { // use buffer as needed, up to binlength number of bytes... } free(buffer); } 
+1
source

If you need your number (in a string) converted from hexadecimal to decimal, you can use atol() with sprintf()

If you need to do this by byte, you can buffer every byte, and as each buffer fills, pass it through sprintf as such:

 char *hexRep; char *decRep; long int decVal; ... decVal = atol(hexRep); sprintf(decRep, "%u", decVal); 

Both of them are in the standard C library. After you get a string representation of each byte, you can simply combine them together with strcat() .

0
source

Here are some small improvements that MISRA should satisfy. The name was confusing.

 static inline uint8_t HexcharToInt(char c) { char result = 16; if (('0' <= c) && (c <= '9')) { result = c - '0'; } else if (('a' <= c) && (c <= 'f')) { result = c + 10 - 'a'; } else if (('A' <= c) && (c <= 'F')) { result = c + 10 - 'A'; } return (uint8_t) result; } uint8_t *array = NULL; size_t hexstringToArray(char *hexstring) { size_t len = (strlen(hexstring) + 1) / 2; // Aufrunden if (array != NULL) { free(array); array = NULL; } array = (uint8_t*) malloc(len); uint8_t *arr = array; for (size_t i = 0; (i < len) && (len > 0); i++) { *arr = 0U; for (uint8_t shift = 8U; (shift > 0U) && (len > 0); ) { shift -= 4U; uint8_t curInt = HexcharToInt(*hexstring++); if (curInt >= 16U) { len = 0; } else { *arr |= ((uint8_t) curInt << shift); } } arr++; } return len; } 
0
source
 inline char HexToChar(char c) { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f') { return c + 10 - 'a'; } else if ('A' <= c && c <= 'F') { return c + 10 - 'A'; } return -1; } size_t HexToBinrary( const char* hex, size_t length, char* binrary, size_t binrary_cap ) { if (length % 2 != 0 || binrary_cap < length / 2) { return 0; } memset(binrary, 0, binrary_cap); size_t n = 0; for (size_t i = 0; i < length; i += 2, ++n) { char high = HexToChar(hex[i]); if (high < 0) { return 0; } char low = HexToChar(hex[i + 1]); if (low < 0) { return 0; } binrary[n] = high << 4 | low; } return n; } 
-1
source

Source: https://habr.com/ru/post/925944/


All Articles