ASSEMBLY: binary search in a sorted array of strings

After several days of working on this binary search problem, which needs to be done completely in Assembly, I'm not quite sure where my logic breaks when it comes to finding name matches [case sensitive] from a sorted array.

Any help would be greatly appreciated:

CC PROGRAM

/*

int b_search (char list[100][20], int count, char* token);

list – the starting address of the list of names to be searched

count – total number of names in the list

token – name to be searched in the list

*/

This is a list of names: Arturo Brian Chris David John Mark Shane SIMON Thomas Toney

Listed below are all the bullets, names to search in the list:

// checks elements with an exact match, for example: "Jon", "shane", "TONY"

// checks case insensitivity, for example: "Chris", "BryAN"

//, , : "" [ ], ""

// , : "DAVe", "Jona"

- , 0 , " ".

, ; .

//============================================================================================ ====================================

:

int b_search (char list[100][20], int count, char* token)
{
__asm
{
// Function returns the positionmove of the token in the list, starting with 1.

// If name is NOT found, return 0.

// Registers used:

// EAX: logical OR value

// EBX: toLowercase() loop counter

// ECX: total number of names

// EDI: name to be searched

// ESI: list pointer

mov eax, 0                      ; // zero out the result
mov ebx, 0                      ; // zero out EBX for use
mov esi, list                   ; // move the list pointer to ESI
mov edi, token                  ; // the name to be searched to EDI

// YOUR CODE HERE
// list - the starting address of the list of names to be searched
// count - total number of names in the list
// token - name to be searched in the list

// ==================================================================================================================
// CHANGE TOKEN LETTERS TO LOWERCASE
// ==================================================================================================================

TOKEN_LOWERCASE: // Cycles through every char in token and converts them to lowercase
    mov eax, [edi + ebx]        ; // move char of token into EAX
    or eax, 0x20                ; // convert to lowercase by logical OR with 0010 0000
    mov [edi + ebx], eax        ; // move new char back into EAX 
    inc ebx                     ; // increments loop counter
    cmp [edi + ebx], 0x00       ; // checks if the next char is a null terminator
jnz TOKEN_LOWERCASE             ; // exit loop in the presence of a null terminator

// ==================================================================================================================
// BINARY SEARCH RECURSION - FIRST ITERATION LOCATION
// All registers are now open except for EDI and ESI
// ==================================================================================================================

mov eax, 0                      ; // set the minimum value to be index first [0]
mov ecx, count                  ; // set the maximum value to be index last [index.length]
mov edx, 0                      ; // zero out EDX for use
push eax                        ; // push minimum value EAX back onto stack
push ecx                        ; // push maximum value ECX back onto stack

BEGIN_BINARY_SEARCH: // return here for recursion
    mov eax, 0                  ; // zero out EAX for use
    //mov ebx, 0                ; // zero out EBX for use
    mov ecx, 0                  ; // zero out ECX for use
    mov edx, 0                  ; // zero out EDX for use

    // FIRST IN, LAST OUT
    pop ecx                     ; // maximum value; first in, last out
    pop eax                     ; // minimum value; first in, last out
    cmp ecx, eax                ; // compares the maximum and minimum values
    jl DONE_EXIT                ; // all operations completed, goto DONE_EXIT [KNOWN ISSUE]
    mov edx, eax                ; // move EAX into EDX
    add edx, ecx                ; // add EAX and ECX, store it into EDX
    sar edx, 0x01               ; // shifts arithmetic right, dividing EDX by 2

    // FIRST IN, LAST OUT
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
    mov eax, 0                  ; // move EAX to 0 for use *****
    mov ebx, 0                  ; // move EBX to 0 for use [external counter, see "RECURSION CONCLUDES"]
    mov ecx, 0                  ; // move ECX to 0 for use

    // ==============================================================================================================
    // INNER RECURSIVE LOOP
    // Registers to keep track of:
    // ECX = token[i]
    // EAX = element[i]
    // ==============================================================================================================
    GO_LOWER: // loop to check if cursor needs to go lower
        mov ecx, edx            ; // move EDX and copy it into ECX; SEE BELOW:
        imul ecx, 0x14          ; // OFFSET_TOTAL = COUNT * 20[Decimal]
        add ecx, ebx            ; // adds offset to EBX
        mov eax, [esi + ecx]    ; // moves element[i] into EAX, where list + 20 * externalCount + internalCount
        // ECX held the offset; it has been moved to EAX, so ECX can be reset
        mov ecx, 0              ; // reset ECX with every iteration to prepare for another address contents
        mov ecx, [edi + ebx]    ; // move token element into ECX
        cmp eax, 0x00           ; // compares EAX to zero; checks for null terminator; SEE BELOW:
        jz NULL_TERM_CHECK      ; // if IS zero, then jump to IS_NULL
        jnz NOT_NULL            ; // if NOT zero, then jump to NOT_NULL

        // ==========================================================================================================
        NULL_TERM_CHECK: // procedure to check contents of ECX are a null terminator at this point
            //cmp ecx, 0x00     ; // checks for null terminator
            cmp ecx, eax        ; // compares token and element
        jz IS_MATCH             ; // if IS null terminator, then reached end of String
        jl DONE_GO_LOWER        ; // if token.length() is shorter then element.length()
        jg DONE_GO_HIGHER       ; // if token.length() is longer than element.length()
        //jnz DONE_EXIT         ; // if NOT null terminator, function is not yet finished; proceed:
        // ==========================================================================================================

        NOT_NULL: // proceed with the rest of the function
            or eax, 0x20        ; // logical OR with EAX will return the letter in lowercase
            sub ecx, eax        ; // -32 -> 0 -> +32; result indicates need to jump DONE_GO_LOWER or DONE_GO_HIGHER
        jl DONE_GO_LOWER        ; // jump to GO_LOWER if less than zero; 
        jg DONE_GO_HIGHER       ; // jump to GO_HIGHER if greater than zero
        inc ebx                 ; // increments loop counter if slips through
    jmp GO_LOWER                ; // return to GO_LOWER for recursion
    // ==============================================================================================================

// ==================================================================================================================
// RECURSION CONCLUDES - END ITERATION LOCATION
// Registers EAX, EBX and ECX are now open
// Register EDX is reserved for being the external loop counter
// ==================================================================================================================

// ==================================================================================================================
DONE_GO_LOWER:

    // FIRST IN, LAST OUT
    pop ecx                     ; // pop maximum value back into ECX from stack
    pop eax                     ; // pop minimum value back into EAX from stack
    mov ecx, edx                ; // move EDX into ECX, copying the value
    sub ecx, 0x01               ; // subtracts 1 from current makes the maximum
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH         ; // jump back to beginning of recursion

// ==================================================================================================================

// ==================================================================================================================
DONE_GO_HIGHER:

    // FIRST IN, LAST OUT
    pop ecx                     ; // pop maximum value back into ECX from stack
    pop eax                     ; // pop minimum value back into EAX from stack
    mov eax, edx                ; // move EDX into EAX, updating the minimum
    add eax, 0x01               ; // adds 1 to current makes the minimum
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH         ; // jump back to beginning of recursion

// ==================================================================================================================

DONE_EXIT:
    mov eax, 0                  ; // move eax back to 0 to finish up
    jmp DONE                    ; // jump to default done location

// ==================================================================================================================
IS_MATCH:
    mov eax, edx                ; // move ESP contents into EAX
jmp DONE                        ; // done with everything

// END PROCEDURE: DEFAULT TO HERE WHEN FINISHED

DONE: // ALL OPERATIONS FINISHED
}

}
+4
2

@Edward . C-, . 39 .

#include <stdio.h>

int bsearch(char a[][20], int count, char *key)
{
  // Answer lies in a[lo .. hi-1].
  int lo = 0, hi = count;

  while (lo < hi) {

    // Midpoint of range where answer must lie.    
    int mid = (lo + hi) / 2;

    // This simulates condition codes for key comparison.
    int cmp;

    // Pointers and character values from key and midpoint strings.
    char *p_key = key, *p_mid = a[mid], ch_key, ch_mid;

    // Pointers advance together through key and midpoint strings, stopping at '\0'.
    for (;;) {

      // Fetch characters from key and array.
      ch_key = *p_key, ch_mid = *p_mid;

      // Break if null is found;
      if (ch_key == 0 || ch_mid == 0) break;

      // Convert to lower case if necessary.
      if ('A' <= ch_key && ch_key <= 'Z') ch_key += 'a' - 'A';
      if ('A' <= ch_mid && ch_mid <= 'Z') ch_mid += 'a' - 'A';

      // Break if inequality is found.
      if (ch_key != ch_mid) break;

      // Move to next character pair.
      p_key++;
      p_mid++;
    }
    // Set the condition codes based on character difference.
    cmp = ch_key - ch_mid; 

    // If equal, we're done.
    if (cmp == 0) return mid;

    // Shrink the range based on comparison result.
    if (cmp < 0) hi = mid;
    else         lo = mid + 1;
  }
  return -1;
}

int main(void) {
  static char a[][20] = {
    "Arturo", "Bryan", "chris", "David", "Jon", "Mark", "shane", "SIMON", "Thomas", "TONY"
  };
  static char keys[][20] = {
    "ARTURo", "brYAn", "cHRiS", "dAvID", "jON", "MaRk", "sHAne", "sImON", "THOmas", "TonY" , "AAAA", "foo", "ZZZZZ"
  };

  #define COUNT(A) (sizeof A / sizeof A[0])

  int i;

  for (i = 0; i < COUNT(keys); i++) {
    printf("%s: %d\n", keys[i], bsearch(a, COUNT(a), keys[i]));
  }

  return 0;
}
+2

. , :

, , , NUL or 0x20 .

, .

, :

mov edx, eax                ; // move EAX into EDX

. , EAX EDX. , .

mov ecx, 0 pop ecx, ( !), . , ; ( ), , . , . . - , .

GO_LOWER eax ecx, .

:

    cmp eax, 0x00           
    jz NULL_TERM_CHECK     
    jnz NOT_NULL          
NULL_TERM_CHECK: 

:

    cmp eax, 0x00           
    jnz NOT_NULL          

, , , .

, , , , , . , , , , .

:

C, . , .

+1

All Articles