Why is non-blocking TCP connect () sometimes so slow on Linux?

I tried to measure the speed of the TCP server that I am writing, and I noticed that there might be a fundamental problem in measuring connect () call speed: if I connect in a non-blocking way, connect () becomes very slow after a few seconds. Here is a sample code in Python:

#! /usr/bin/python2.4 import errno import os import select import socket import sys import time def NonBlockingConnect(sock, addr): #time.sleep(0.0001) # Fixes the problem. while True: try: return sock.connect(addr) except socket.error, e: if e.args[0] not in (errno.EINPROGRESS, errno.EALREADY): raise os.write(2, '^') if not select.select((), (sock,), (), 0.5)[1]: os.write(2, 'P') def InfiniteClient(addr): while True: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) sock.setblocking(0) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # sock.connect(addr) NonBlockingConnect(sock, addr) sock.close() os.write(2, '.') def InfiniteServer(server_socket): while True: sock, addr = server_socket.accept() sock.close() server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server_socket.bind(('127.0.0.1', 45454)) server_socket.listen(128) if os.fork(): # Parent. InfiniteServer(server_socket) else: addr = server_socket.getsockname() server_socket.close() InfiniteClient(addr) 

With NonBlockingConnect most connect () operations are quick, but every few seconds there is one connect () operation that takes at least 2 seconds (as indicated by 5 consecutive P letters in the output). Using sock.connect instead of NonBlockingConnect , all connection operations seem fast.

How can I get rid of these slow connections () s?

I run the Ubuntu Karmic desktop with the standard PAE kernel:

 Linux narancs 2.6.31-20-generic-pae #57-Ubuntu SMP Mon Feb 8 10:23:59 UTC 2010 i686 GNU/Linux 

It is strange that there are no delays with strace -f ./conn.py .

It is strange that there are no delays if I uncomment the very fast time.sleep .

It is strange that there are no delays on my Ubuntu Hardy system:

All of these systems are affected (running Ubuntu Karmic, Ubuntu Hardy, Debian Etch):

 Linux narancs 2.6.31-20-generic-pae #57-Ubuntu SMP Mon Feb 8 10:23:59 UTC 2010 i686 GNU/Linux Linux t 2.6.24-grsec #1 SMP Thu Apr 24 14:15:58 CEST 2008 x86_64 GNU/Linux Linux geekpad 2.6.24-24-generic #1 SMP Fri Sep 18 16:49:39 UTC 2009 i686 GNU/Linux 

It is strange that the following Debian Lenny system is not affected:

 Linux t 2.6.31.5 #2 SMP Thu Nov 5 15:33:05 CET 2009 i686 GNU/Linux 

FYI There are no delays if I use the AF_UNIX socket.

FYI I get the same behavior if I implement a client in C:

 /* by pts@fazekas.hu at Sun Apr 25 20:47:24 CEST 2010 */ #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <netinet/in.h> #include <stdio.h> #include <string.h> #include <sys/select.h> #include <sys/socket.h> #include <unistd.h> static int work(void) { fd_set rset; fd_set wset; fd_set eset; socklen_t sl; struct timeval timeout; struct sockaddr_in sa; int sd, i, j; long l; sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { perror("socket"); return 2; } l = fcntl(sd, F_GETFL, 0); if (l < 0) { perror("fcntl-getfl"); close(sd); return 2; } if (0 != fcntl(sd, F_SETFL, l | O_NONBLOCK)) { perror("fcntl-setfl"); close(sd); return 2; } memset(&sa, '\0', sizeof(sa)); sa.sin_family = AF_INET; sa.sin_port = htons(45454); sa.sin_addr.s_addr = inet_addr("127.0.0.1"); while (0 != connect(sd, (struct sockaddr*)&sa, sizeof sa)) { if (errno != EAGAIN && errno != EINPROGRESS && errno != EALREADY) { perror("connect"); close(sd); return 2; } FD_ZERO(&rset); FD_ZERO(&wset); FD_ZERO(&eset); j = 0; do { timeout.tv_sec = 0; timeout.tv_usec = 100 * 1000; /* 0.1 sec */ FD_SET(sd, &wset); FD_SET(sd, &eset); i = select(sd + 1, &rset, &wset, &eset, &timeout); if (i < 0) { perror("select"); close(sd); return 2; } if (++j == 5) { (void)write(2, "P", 1); j = 0; } } while (i == 0); sl = sizeof i; if (0 != getsockopt(sd, SOL_SOCKET, SO_ERROR, &i, &sl)) { perror("getsockopt"); close(sd); return 2; } if (i != 0) { if (i == ECONNRESET) { (void)write(2, "R", 1); close(sd); return -3; } fprintf(stderr, "connect-SO_ERROR: %s\n", strerror(i)); close(sd); return 2; } } close(sd); return 0; } int main(int argc, char**argv) { int i; (void)argc; (void)argv; while ((i = work()) <= 0) (void)write(2, ".", 1); return i; } 
+7
linux nonblocking sockets tcp
source share
2 answers

Given that sleeping and hiding the cause of the problem, this seems like some kind of schedule problem when the server process does not receive the scheduled connection receive. Although server scheduling is not completed in 2 seconds, it is a very long time.

Perhaps a tool like latencytop can help identify what is happening. You can probably only run this on Karmic (2.6.31), since the other kernels are too old, I think.

+1
source share

Are you sure the connect() call is slow? in most libraries, DNS resolution is always blocked. check if it always uses IP addresses.

+1
source share

All Articles