From 3f0a442799955f56b2c77aabd6bc7aa4458718b4 Mon Sep 17 00:00:00 2001 From: Ludovic Pouzenc Date: Sun, 17 Jul 2016 14:21:26 +0200 Subject: API changes, pedandic fixes, dgrambuf stats & info field, recvmmsg() with alarm(), partial writev() support. --- mcastseed/src/dgrambuf.c | 425 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 305 insertions(+), 120 deletions(-) (limited to 'mcastseed/src/dgrambuf.c') diff --git a/mcastseed/src/dgrambuf.c b/mcastseed/src/dgrambuf.c index 41ebc8a..75b82a6 100644 --- a/mcastseed/src/dgrambuf.c +++ b/mcastseed/src/dgrambuf.c @@ -10,67 +10,107 @@ #include /* recvmmsg() _GNU_SOURCE */ #include /* calloc(), free(), qsort() */ #include /* perror() */ +#include /* errno */ #include /* memset() */ #include /* writev() */ -#include /* MIN() */ +#include /* uint8_t, uint64_t */ +#include /* sigaction() */ +#include /* alarm() */ +#include /* SSIZE_MAX */ struct uint_pair { unsigned int index; unsigned int value; }; +struct dgrambuf_stats_t { + uint64_t dgrambuf_read_on_full; + uint64_t recvmmsg_calls, recv_dgrams, recv_byte; + uint64_t dgram_invalid, dgram_past, dgram_future, dgram_dup, dgram_end_marker; + uint64_t qsort_calls; + uint64_t writev_calls, write_partial, write_byte; +}; + struct dgrambuf_t { + /* dgram validation after receive, takes dgram len and a pointer to the start of dgram data + Must returns dgram seq number or 0 if invalid dgram */ + int (*validate_func)(unsigned int, void *, unsigned int*); + + struct dgrambuf_stats_t stats; + struct sigaction sa_sigalrm; + size_t dgram_slots; size_t dgram_free_count; size_t dgram_max_size; size_t dgram_header_size; size_t iovec_slots; - struct iovec *iov_recv; - struct iovec *iov_write; struct mmsghdr *msgs; + struct iovec *iov_recv; + struct iovec *iov_write; /* malloc'ed array */ + + struct iovec *partial_write_iov; /* Pointer to an item of iov_write[] */ + size_t partial_write_remaining_iovcnt; + size_t partial_write_remaining_bytes; + unsigned int dgram_seq_last; unsigned int dgram_seq_base; unsigned int *dgram_len; - unsigned int *dgram_seq_numbers; /* Stores the decoded datagram sequence number for each dgram slot of buf */ + unsigned int *dgram_seq_numbers; /* Decoded datagram sequence number for each dgram_slot of buf */ int dgram_ordered_seq_numbers_is_dirty; - struct uint_pair *dgram_ordered_seq_numbers; - - void *buf; + struct uint_pair *dgram_ordered_seq_numbers; /* Pairs to track original items ranks after qsort() */ - unsigned int (*validate_func)(unsigned int, void *); - //TODO pthread_mutex_lock + uint8_t *buf; /* malloc-ed 2d byte array : buf[dgram_slots][dgram_max_size] */ }; -int _compare_uint_pair(const void *pa, const void *pb); +void _sigalrm_handler(int signum); +int _compare_uint_pair(const void *pa, const void *pb); void _update_ordered_seq_numbers(dgrambuf_t dbuf); -void dgrambuf_set_validate_func(dgrambuf_t dbuf, unsigned int (*func)(unsigned int, void *) ) { - dbuf->validate_func = func; +#ifndef HAVE_MIN_SIZE_T +size_t min_size_t(size_t a, size_t b) { return (avalidate_func = validate_func; } -size_t dgrambuf_free_count(const dgrambuf_t dbuf) { +size_t dgrambuf_get_free_count(const dgrambuf_t dbuf) { return dbuf->dgram_free_count; } -int dgrambuf_recvmmsg(dgrambuf_t dbuf, int sockfd) { - void *dgram_base; - size_t vlen, i, dgram_index; - int recv_msg_count, res; +int dgrambuf_everything_was_received(dgrambuf_t dbuf) { + /*TODO really implement this */ + return dbuf->dgram_seq_last && ( dbuf->dgram_seq_base - 1 == dbuf->dgram_seq_last ); +} + +ssize_t dgrambuf_recvmmsg(dgrambuf_t dbuf, int sockfd, int timeout, int *info) { + uint8_t *dgram_base; + ssize_t recv_byte; + size_t i, vlen, dgram_index, recv_msg_count; + int res; unsigned int seq, dgram_len; + struct sigaction sa_old; - /* Buffer is full, can't receive */ - if ( dbuf->dgram_free_count == 0 ) { - return -1; - } + /* Info ptr is mandatory */ + *info = 0; /* Validate function is mandatory */ if ( !dbuf->validate_func ) { - return -2; + return -3; + } + + /* Buffer is full, can't receive */ + if ( dbuf->dgram_free_count == 0 ) { + dbuf->stats.dgrambuf_read_on_full++; + *info |= DGRAMBUF_RECV_OVERWRITE; + /*FIXME : this locks everything if buf full + next seq missing*/ + return 0; } /* Initialize recvmmsg() syscall arguments */ for (i=0, vlen=0; i < dbuf->dgram_slots; i++) { + /*XXX linear search is not optimal, notably if is_dirty == 0*/ if ( dbuf->dgram_seq_numbers[i] == 0 ) { dbuf->iov_recv[vlen].iov_base = dbuf->buf + i*dbuf->dgram_max_size; dbuf->iov_recv[vlen].iov_len = dbuf->dgram_max_size; @@ -83,48 +123,92 @@ int dgrambuf_recvmmsg(dgrambuf_t dbuf, int sockfd) { } } - /* Do the syscall */ - recv_msg_count = recvmmsg(sockfd, dbuf->msgs, vlen, MSG_WAITFORONE, NULL); - if (recv_msg_count < 0) { - perror("recvmmsg()"); - return recv_msg_count; + /* Do the syscall with alarm() to circumvent bad behavior in recvmmsg(2) timeout */ + if (timeout) { + sigaction(SIGALRM, &(dbuf->sa_sigalrm), &sa_old); + alarm(timeout); + } + res = recvmmsg(sockfd, dbuf->msgs, vlen, MSG_WAITFORONE, NULL); + if (timeout) { + alarm(0); + sigaction(SIGALRM, &sa_old, NULL); + } + dbuf->stats.recvmmsg_calls++; + + if (res < 0) { + if ( errno == EINTR ) { + recv_msg_count = 0; + *info |= DGRAMBUF_RECV_EINTR; + } else { + perror("recvmmsg()"); + return -1; + } + } else { + recv_msg_count = res; } + if (recv_msg_count > 0) { dbuf->dgram_ordered_seq_numbers_is_dirty = 1; + dbuf->stats.recv_dgrams += recv_msg_count; + if ( recv_msg_count == vlen ) { /* XXX -Wsigncompare hints problems here and above */ + *info |= DGRAMBUF_RECV_IOVEC_FULL; + } } /* Check all received messages */ - res = 1; - for (i=0; iiov_recv[i].iov_base; dgram_index = (dgram_base - dbuf->buf) / dbuf->dgram_max_size; dgram_len = dbuf->msgs[i].msg_len; - seq = dbuf->validate_func(dgram_len, dgram_base); - - // TODO better feedback - if ( seq == -1 ) { - fprintf(stderr, "dgrambuf_recvmmsg(): #%zi end\n", i); - res = 0; - } else if ( seq == 0 ) { - fprintf(stderr, "dgrambuf_recvmmsg(): #%zi invalid (%u)\n", i, seq); - } else if ( seq < dbuf->dgram_seq_base ) { - fprintf(stderr, "dgrambuf_recvmmsg(): #%zi past (%u)\n", i, seq); - } else if ( seq >= dbuf->dgram_seq_base + dbuf->dgram_slots ) { - fprintf(stderr, "dgrambuf_recvmmsg(): #%zi future (%u)\n", i, seq); - } else { - //fprintf(stderr, "dgrambuf_recvmmsg(): #%zi valid (%u)\n", i, seq); - dbuf->dgram_seq_numbers[dgram_index] = seq; - dbuf->dgram_ordered_seq_numbers_is_dirty = 1; - dbuf->dgram_len[dgram_index] = dgram_len; - dbuf->dgram_free_count--; + + /* dgrambuf_new() adjust iovec_len to prevent overflows on ssize_t*/ + recv_byte += dgram_len; + + res = dbuf->validate_func(dgram_len, dgram_base, &seq); + switch (res) { + case 1: + if ( seq < dbuf->dgram_seq_base ) { + fprintf(stderr, "dgrambuf_recvmmsg(): #%zu past (%u)\n", i, seq); + dbuf->stats.dgram_past++; + } else if ( seq >= dbuf->dgram_seq_base + dbuf->dgram_slots ) { + fprintf(stderr, "dgrambuf_recvmmsg(): #%zu future (%u)\n", i, seq); + dbuf->stats.dgram_future++; + *info |= DGRAMBUF_RECV_FUTURE_DGRAM; + } else { + /*fprintf(stderr, "dgrambuf_recvmmsg(): #%zu valid (%u)\n", i, seq);*/ + *info |= DGRAMBUF_RECV_VALID_DGRAM; + dbuf->dgram_seq_numbers[dgram_index] = seq; + dbuf->dgram_ordered_seq_numbers_is_dirty = 1; + dbuf->dgram_len[dgram_index] = dgram_len; + dbuf->dgram_free_count--; + } + break; + case 2: + fprintf(stderr, "dgrambuf_recvmmsg(): #%zu finalize (%u)\n", i, seq); + dbuf->stats.dgram_end_marker++; + dbuf->dgram_seq_last = seq; + *info |= DGRAMBUF_RECV_FINALIZE; + break; + default: + fprintf(stderr, "dgrambuf_recvmmsg(): #%zu invalid\n", i); + dbuf->stats.dgram_invalid++; + break; } } - return res; + dbuf->stats.recv_byte += recv_byte; + + return recv_byte; } int dgrambuf_have_data_ready_to_write(dgrambuf_t dbuf) { unsigned int next_dgram_seq; + + /* Last write was partial, so there is more to write */ + if ( dbuf->partial_write_remaining_bytes > 0 ) { + return 1; + } + /* Buffer is empty, nothing to write */ if ( dbuf->dgram_free_count == dbuf->dgram_slots ) { return 0; @@ -144,80 +228,158 @@ int dgrambuf_have_data_ready_to_write(dgrambuf_t dbuf) { return 1; } -ssize_t dgrambuf_write(dgrambuf_t dbuf, int fd) { +int dgrambuf_have_received_everything(dgrambuf_t dbuf) { + if (dbuf) {}; + return 0; /*FIXME to be implemented*/ +} + +ssize_t dgrambuf_write(dgrambuf_t dbuf, int fd, int *info) { size_t dgram_index, i, vlen; unsigned int curr_seq, prev_seq, dgram_len; - ssize_t nwrite, total; - - /* Write needs up to date ordered_seq_numbers */ - if ( dbuf->dgram_ordered_seq_numbers_is_dirty ) { - _update_ordered_seq_numbers(dbuf); - } - /* Initialize iovecs for writev, take dgram payloads following the sequence numbers */ - prev_seq=0, total=0; - for (i=dbuf->dgram_free_count, vlen=0; i < dbuf->dgram_slots && vlen < dbuf->iovec_slots; i++) { - curr_seq = dbuf->dgram_ordered_seq_numbers[i].value; - - /* Skip empty dgram slot */ - if ( curr_seq == 0 ) { - fprintf(stderr, "Oops : found empty slot (i==%zi)\n", i); - continue; - } - /* Skip if current dgram is a dup of the previous */ - if ( curr_seq == prev_seq ) { - goto mark_empty; - } - /* Skip dgram comming from the past */ - if ( curr_seq < dbuf->dgram_seq_base ) { - fprintf(stderr, "Oops : found dgram from past in buffer (%u)\n", curr_seq); - goto mark_empty; - } - /* Stop if first dgram to write is not in buffer at all */ - if ( ( vlen==0 ) && (curr_seq != dbuf->dgram_seq_base) ) { - fprintf(stderr, "Oops : nothing to write, missing %u seq\n", dbuf->dgram_seq_base); - break; - } - /* Stop if current seq dgram is missing */ - if ( ( vlen > 0 ) && (curr_seq > prev_seq+1 ) ) { - break; + ssize_t nwrite, total, remain, len; + struct iovec *iov; + + /* FIXME Info ptr is mandatory */ + *info = 0; + + if ( dbuf->partial_write_remaining_bytes > 0 ) { + /* Previous writev() was partial, continue it */ + iov = dbuf->partial_write_iov; + vlen = dbuf->partial_write_remaining_iovcnt; + total = dbuf->partial_write_remaining_bytes; + } else if ( ! dgrambuf_have_data_ready_to_write(dbuf) ) { + return 0; /* XXX Inline code ? */ + } else { + /* Prepare a write batch, buffer state is in dgram_seq_numbers */ + iov = dbuf->iov_write; + vlen = 0; + total = 0; + /* Write needs up to date ordered_seq_numbers (dgrams could be unsorted or some are lost)*/ + if ( dbuf->dgram_ordered_seq_numbers_is_dirty ) { + _update_ordered_seq_numbers(dbuf); } + /* Initialize iovecs for writev, take dgram payloads following the sequence numbers */ + prev_seq = 0; + for (i = dbuf->dgram_free_count; i < dbuf->dgram_slots && vlen < dbuf->iovec_slots; i++) { + curr_seq = dbuf->dgram_ordered_seq_numbers[i].value; + + /* Skip empty dgram slot */ + if ( curr_seq == 0 ) { + fprintf(stderr, "Oops : found empty slot (i==%zu)\n", i); + continue; + } + /* Skip if current dgram is a dup of the previous */ + if ( curr_seq == prev_seq ) { + dbuf->stats.dgram_dup++; + goto mark_empty; + } + /* Skip dgram comming from the past */ + if ( curr_seq < dbuf->dgram_seq_base ) { + fprintf(stderr, "Oops : found dgram from past in buffer (%u)\n", curr_seq); + goto mark_empty; + } + /* Stop if first dgram to write is not in buffer at all */ + if ( ( vlen==0 ) && (curr_seq != dbuf->dgram_seq_base) ) { + fprintf(stderr, "Oops : nothing to write, missing %u seq\n", dbuf->dgram_seq_base); + break; + } + /* Stop if current seq dgram is missing */ + if ( ( vlen > 0 ) && (curr_seq > prev_seq+1 ) ) { + break; + } - /* Normal case : curr_seq is the next dgram to write */ - dgram_index = dbuf->dgram_ordered_seq_numbers[i].index; - dgram_len = dbuf->dgram_len[dgram_index] - dbuf->dgram_header_size; + /* Normal case : curr_seq is the next dgram to write */ + dgram_index = dbuf->dgram_ordered_seq_numbers[i].index; + dgram_len = dbuf->dgram_len[dgram_index] - dbuf->dgram_header_size; - /* Setup iovecs */ - dbuf->iov_write[vlen].iov_len = dgram_len; - dbuf->iov_write[vlen].iov_base = dbuf->buf + dgram_index*dbuf->dgram_max_size + dbuf->dgram_header_size; + /* Setup iovecs */ + dbuf->iov_write[vlen].iov_len = dgram_len; + dbuf->iov_write[vlen].iov_base = dbuf->buf + + dgram_index*dbuf->dgram_max_size + dbuf->dgram_header_size; - /* Update counters */ - total += dgram_len; - dbuf->dgram_seq_base = curr_seq + 1; - prev_seq = curr_seq; - vlen++; + /* Update counters */ + total += dgram_len; + prev_seq = curr_seq; + vlen++; - /* Mark dgram slot about to be written out as empty for next read */ - //XXX These cause harm if writev() is incomplete + /* Mark dgram slot about to be written out as empty for next read */ + /*FIXME These cause harm if writev() is incomplete*/ + dbuf->dgram_seq_base = curr_seq + 1; mark_empty: - /* Mark slot as empty */ - dgram_index = dbuf->dgram_ordered_seq_numbers[i].index; - dbuf->dgram_seq_numbers[dgram_index] = 0; - dbuf->dgram_free_count++; - } + /* Mark slot as empty */ + dgram_index = dbuf->dgram_ordered_seq_numbers[i].index; + dbuf->dgram_seq_numbers[dgram_index] = 0; + dbuf->dgram_free_count++; + } - /* Nothing valid to write out (but buffer not empty, missing the next dgram) */ - if ( vlen == 0 ) { - fprintf(stderr, "Oops : nothing to write at all\n"); - return -1; + /* Nothing valid to write out (but buffer not empty, missing the next dgram) */ + if ( vlen == 0 ) { + fprintf(stderr, "Oops : nothing to write at all\n"); + return -2; + } + + if ( vlen == dbuf->iovec_slots ) { + *info |= DGRAMBUF_WRITE_IOVEC_FULL; + } } - nwrite = writev(fd, dbuf->iov_write, vlen); + nwrite = writev(fd, iov, vlen); + dbuf->stats.writev_calls++; if ( nwrite < 0 ) { + /* Treat non fatal errors */ + if ( errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { + /* Keeps some state informations for retry */ + dbuf->partial_write_remaining_bytes = total; + dbuf->partial_write_remaining_iovcnt = vlen; + dbuf->partial_write_iov = iov; + *info |= DGRAMBUF_WRITE_EWOULDBLOCK_OR_EINTR; + return 0; + } + /* Print fatal errors and bail out */ perror("writev()"); - } else if ( nwrite != total ) { - //FIXME : everything break if there because all non writed data will be overwritted at next read - // Make a loop here could make dgrambuf_writev() unbounded in run time - fprintf(stderr, "writev() short\n"); + return -1; + } + + /* XXX Remove me when code is correct */ + if ( nwrite > total ) { + fprintf(stderr, "Fatal bug : nwrite > total\n"); + return -3; + } + if ( nwrite > 0 ) { + dbuf->stats.write_byte += nwrite; + *info |= DGRAMBUF_WRITE_SUCCESS; + } + + /* Check if the write was partially done */ + dbuf->partial_write_remaining_bytes = total - nwrite; + if ( dbuf->partial_write_remaining_bytes > 0 ) { + *info |= DGRAMBUF_WRITE_PARTIAL; + dbuf->stats.write_partial++; + /* Find the partially written iov and update it */ + remain = nwrite; + for (i=0; iiov_write[i].iov_len; + if ( remain < len ) { + dbuf->partial_write_remaining_iovcnt = vlen - i; + dbuf->partial_write_iov = dbuf->iov_write + i; + + dbuf->iov_write[i].iov_base = + (uint8_t *) dbuf->iov_write[i].iov_base + remain; + dbuf->iov_write[i].iov_len -= remain; + break; + } + remain -= len; + } + if ( i == vlen ) { + /* FIXME : this happens */ + fprintf(stderr, "Fatal bug, failed to find partial iov after partial write\n"); + return -3; + } + + } else { + /* Wipe outdated values for clarity in debug mode (only _bytes is use on branching) */ + dbuf->partial_write_iov = NULL; + dbuf->partial_write_remaining_iovcnt = 0; } return nwrite; @@ -228,21 +390,38 @@ dgrambuf_t dgrambuf_new(size_t dgram_slots, size_t dgram_max_size, size_t dgram_ dgrambuf_t dbuf = calloc(1, sizeof(struct dgrambuf_t)); if (!dbuf) goto fail0; + dbuf->validate_func = NULL; + /* Implicit with dbuf = calloc(...) + memset(&(dbuf->stats), 0, sizeof(struct dgrambuf_stats_t)); + memset(&(dbuf->sa_sigalrm), 0, sizeof(struct sigaction)); + */ + dbuf->sa_sigalrm.sa_handler = _sigalrm_handler; + dbuf->dgram_slots = dgram_slots; dbuf->dgram_free_count = dgram_slots; dbuf->dgram_max_size = dgram_max_size; dbuf->dgram_header_size = dgram_header_size; - dbuf->iovec_slots = MIN(iovec_slots,dgram_slots); + + /* writev() and dgrambuf_recvmmsg accumulates read/write bytes in ssize_t */ + iovec_slots = min_size_t(iovec_slots, SSIZE_MAX/dgram_max_size); + dbuf->iovec_slots = iovec_slots; + + dbuf->msgs = calloc(iovec_slots, sizeof(struct mmsghdr)); + if (!dbuf->msgs) goto fail1; dbuf->iov_recv = calloc(iovec_slots, sizeof(struct iovec)); - if (!dbuf->iov_recv) goto fail1; + if (!dbuf->iov_recv) goto fail2; dbuf->iov_write = calloc(iovec_slots, sizeof(struct iovec)); - if (!dbuf->iov_write) goto fail2; + if (!dbuf->iov_write) goto fail3; - dbuf->msgs = calloc(iovec_slots, sizeof(struct mmsghdr)); - if (!dbuf->msgs) goto fail3; + /* Implicit with dbuf = calloc(...) + dbuf->partial_write_iov = NULL; + dbuf->partial_write_remaining_iovcnt = 0; + dbuf->partial_write_remaining_bytes = 0; + dbuf->dgram_seq_last = 0; + */ dbuf->dgram_seq_base = 1; dbuf->dgram_len = calloc(dgram_slots, sizeof(unsigned int)); if (!dbuf->dgram_len) goto fail4; @@ -262,9 +441,9 @@ dgrambuf_t dgrambuf_new(size_t dgram_slots, size_t dgram_max_size, size_t dgram_ fail7: free(dbuf->dgram_ordered_seq_numbers); fail6: free(dbuf->dgram_seq_numbers); fail5: free(dbuf->dgram_len); -fail4: free(dbuf->msgs); -fail3: free(dbuf->iov_write); -fail2: free(dbuf->iov_recv); +fail4: free(dbuf->iov_write); +fail3: free(dbuf->iov_recv); +fail2: free(dbuf->msgs); fail1: free(dbuf); fail0: return NULL; } @@ -275,12 +454,17 @@ void dgrambuf_free(dgrambuf_t *dbuf) { free((*dbuf)->dgram_ordered_seq_numbers); free((*dbuf)->dgram_seq_numbers); free((*dbuf)->dgram_len); - free((*dbuf)->msgs); free((*dbuf)->iov_write); free((*dbuf)->iov_recv); + free((*dbuf)->msgs); free(*dbuf); + *dbuf = NULL; } - *dbuf = NULL; +} + +void _sigalrm_handler(int signum) { + /* Nothing to do except interrupting the pending syscall */ + if (signum) {} /* Avoid compiler warning */ } int _compare_uint_pair(const void *pa, const void *pb) { @@ -295,7 +479,7 @@ int _compare_uint_pair(const void *pa, const void *pb) { } void _update_ordered_seq_numbers(dgrambuf_t dbuf) { - ssize_t i; + size_t i; /* Initialize dgram_ordered_seq_numbers from dgram_seq_numbers */ for (i=0; i < dbuf->dgram_slots; i++) { dbuf->dgram_ordered_seq_numbers[i].index = i; @@ -303,6 +487,7 @@ void _update_ordered_seq_numbers(dgrambuf_t dbuf) { } /* Inplace sorting of dgram_ordered_seq_numbers */ qsort(dbuf->dgram_ordered_seq_numbers, dbuf->dgram_slots, sizeof(struct uint_pair), _compare_uint_pair); + dbuf->stats.qsort_calls++; dbuf->dgram_ordered_seq_numbers_is_dirty = 0; } -- cgit v1.2.3