Here is the 64 bit version of CreateStub. Kudos to Andrei Gruzdev, who provided the code.
type ICallbackStub = interface(IInterface) function GetStubPointer: Pointer; property StubPointer : Pointer read GetStubPointer; end; TCallbackStub = class(TInterfacedObject, ICallbackStub) private fStubPointer : Pointer; fCodeSize : integer; function GetStubPointer: Pointer; public constructor Create(Obj : TObject; MethodPtr: Pointer; NumArgs : integer); destructor Destroy; override; end; constructor TCallBackStub.Create(Obj: TObject; MethodPtr: Pointer; NumArgs: integer); {$IFNDEF CPUX64} // as before {$ELSE CPUX64} const RegParamCount = 4; ShadowParamCount = 4; Size32Bit = 4; Size64Bit = 8; ShadowStack = ShadowParamCount * Size64Bit; SkipParamCount = RegParamCount - ShadowParamCount; StackSrsOffset = 3; c64stack: array[0..14] of byte = ( $48, $81, $ec, 00, 00, 00, 00,// sub rsp,$0 $4c, $89, $8c, $24, ShadowStack, 00, 00, 00// mov [rsp+$20],r9 ); CopySrcOffset=4; CopyDstOffset=4; c64copy: array[0..15] of byte = ( $4c, $8b, $8c, $24, 00, 00, 00, 00,// mov r9,[rsp+0] $4c, $89, $8c, $24, 00, 00, 00, 00// mov [rsp+0],r9 ); RegMethodOffset = 10; RegSelfOffset = 11; c64regs: array[0..28] of byte = ( $4d, $89, $c1, // mov r9,r8 $49, $89, $d0, // mov r8,rdx $48, $89, $ca, // mov rdx,rcx $48, $b9, 00, 00, 00, 00, 00, 00, 00, 00, // mov rcx, Obj $48, $b8, 00, 00, 00, 00, 00, 00, 00, 00 // mov rax, MethodPtr ); c64jump: array[0..2] of byte = ( $48, $ff, $e0 // jump rax ); CallOffset = 6; c64call: array[0..10] of byte = ( $48, $ff, $d0, // call rax $48, $81,$c4, 00, 00, 00, 00, // add rsp,$0 $c3// ret ); var i: Integer; P,PP,Q: PByte; lCount : integer; lSize : integer; lOffset : integer; begin lCount := SizeOf(c64regs); if NumArgs>=RegParamCount then Inc(lCount,sizeof(c64stack)+(NumArgs-RegParamCount)*sizeof(c64copy)+sizeof(c64call)) else Inc(lCount,sizeof(c64jump)); Q := VirtualAlloc(nil, lCount, MEM_COMMIT, PAGE_EXECUTE_READWRITE); P := Q; lSize := 0; if NumArgs>=RegParamCount then begin lSize := ( 1+ ((NumArgs + 1 - SkipParamCount) div 2) * 2 )* Size64Bit; // 16 byte stack align pp := p; move(c64stack,P^,SizeOf(c64stack)); Inc(P,StackSrsOffset); move(lSize,P^,Size32Bit); p := pp; Inc(P,SizeOf(c64stack)); for I := 0 to NumArgs - RegParamCount -1 do begin pp := p; move(c64copy,P^,SizeOf(c64copy)); Inc(P,CopySrcOffset); lOffset := lSize + (i+ShadowParamCount+1)*Size64Bit; move(lOffset,P^,Size32Bit); Inc(P,CopyDstOffset+Size32Bit); lOffset := (i+ShadowParamCount+1)*Size64Bit; move(lOffset,P^,Size32Bit); p := pp; Inc(P,SizeOf(c64copy)); end; end; pp := p; move(c64regs,P^,SizeOf(c64regs)); Inc(P,RegSelfOffset); move(Obj,P^,SizeOf(Obj)); Inc(P,RegMethodOffset); move(MethodPtr,P^,SizeOf(MethodPtr)); p := pp; Inc(P,SizeOf(c64regs)); if NumArgs<RegParamCount then move(c64jump,P^,SizeOf(c64jump)) else begin move(c64call,P^,SizeOf(c64call)); Inc(P,CallOffset); move(lSize,P^,Size32Bit); end; fCodeSize := lcount; fStubPointer := Q; {$ENDIF CPUX64} end; destructor TCallBackStub.Destroy; begin VirtualFree(fStubPointer, fCodeSize, MEM_DECOMMIT); inherited; end; function TCallBackStub.GetStubPointer: Pointer; begin Result := fStubPointer; end;