I tried to measure the speed of the TCP server that I am writing, and I noticed that there might be a fundamental problem in measuring connect () call speed: if I connect in a non-blocking way, connect () becomes very slow after a few seconds. Here is a sample code in Python:
#! /usr/bin/python2.4 import errno import os import select import socket import sys import time def NonBlockingConnect(sock, addr): #time.sleep(0.0001) # Fixes the problem. while True: try: return sock.connect(addr) except socket.error, e: if e.args[0] not in (errno.EINPROGRESS, errno.EALREADY): raise os.write(2, '^') if not select.select((), (sock,), (), 0.5)[1]: os.write(2, 'P') def InfiniteClient(addr): while True: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) sock.setblocking(0) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # sock.connect(addr) NonBlockingConnect(sock, addr) sock.close() os.write(2, '.') def InfiniteServer(server_socket): while True: sock, addr = server_socket.accept() sock.close() server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server_socket.bind(('127.0.0.1', 45454)) server_socket.listen(128) if os.fork(): # Parent. InfiniteServer(server_socket) else: addr = server_socket.getsockname() server_socket.close() InfiniteClient(addr)
With NonBlockingConnect most connect () operations are quick, but every few seconds there is one connect () operation that takes at least 2 seconds (as indicated by 5 consecutive P letters in the output). Using sock.connect instead of NonBlockingConnect , all connection operations seem fast.
How can I get rid of these slow connections () s?
I run the Ubuntu Karmic desktop with the standard PAE kernel:
Linux narancs 2.6.31-20-generic-pae
It is strange that there are no delays with strace -f ./conn.py .
It is strange that there are no delays if I uncomment the very fast time.sleep .
It is strange that there are no delays on my Ubuntu Hardy system:
All of these systems are affected (running Ubuntu Karmic, Ubuntu Hardy, Debian Etch):
Linux narancs 2.6.31-20-generic-pae
It is strange that the following Debian Lenny system is not affected:
Linux t 2.6.31.5
FYI There are no delays if I use the AF_UNIX socket.
FYI I get the same behavior if I implement a client in C:
#include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <netinet/in.h> #include <stdio.h> #include <string.h> #include <sys/select.h> #include <sys/socket.h> #include <unistd.h> static int work(void) { fd_set rset; fd_set wset; fd_set eset; socklen_t sl; struct timeval timeout; struct sockaddr_in sa; int sd, i, j; long l; sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { perror("socket"); return 2; } l = fcntl(sd, F_GETFL, 0); if (l < 0) { perror("fcntl-getfl"); close(sd); return 2; } if (0 != fcntl(sd, F_SETFL, l | O_NONBLOCK)) { perror("fcntl-setfl"); close(sd); return 2; } memset(&sa, '\0', sizeof(sa)); sa.sin_family = AF_INET; sa.sin_port = htons(45454); sa.sin_addr.s_addr = inet_addr("127.0.0.1"); while (0 != connect(sd, (struct sockaddr*)&sa, sizeof sa)) { if (errno != EAGAIN && errno != EINPROGRESS && errno != EALREADY) { perror("connect"); close(sd); return 2; } FD_ZERO(&rset); FD_ZERO(&wset); FD_ZERO(&eset); j = 0; do { timeout.tv_sec = 0; timeout.tv_usec = 100 * 1000; /* 0.1 sec */ FD_SET(sd, &wset); FD_SET(sd, &eset); i = select(sd + 1, &rset, &wset, &eset, &timeout); if (i < 0) { perror("select"); close(sd); return 2; } if (++j == 5) { (void)write(2, "P", 1); j = 0; } } while (i == 0); sl = sizeof i; if (0 != getsockopt(sd, SOL_SOCKET, SO_ERROR, &i, &sl)) { perror("getsockopt"); close(sd); return 2; } if (i != 0) { if (i == ECONNRESET) { (void)write(2, "R", 1); close(sd); return -3; } fprintf(stderr, "connect-SO_ERROR: %s\n", strerror(i)); close(sd); return 2; } } close(sd); return 0; } int main(int argc, char**argv) { int i; (void)argc; (void)argv; while ((i = work()) <= 0) (void)write(2, ".", 1); return i; }