Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

C and goto

Name: Anonymous 2012-06-19 6:00

Is it okay to use goto in C for functions like this? (It's actually C++, but I'm doing C-style development.) Is this function too long, or is it okay due to the nature of null-terminated string processing? It's been a long time since I've done C-style development. Any other recommendations (except "use Lisp or XYZ language instead")?

ssize_t normalize_path(char* restrict dest, size_t dest_max, char const* restrict p, size_t n) noexcept {
    size_t i, m, length;
    ssize_t retval;
    char const* q;
    char* buffer, * state;
    char const** parts, **new_parts;
    size_t parts_size, new_parts_size;
    char const* default_parts[32];

    // validate arguments
    if ((dest_max > (SSIZE_MAX + 1)) || (!dest && dest_max) || (!p && n)) {
        errno = EINVAL;
        return -1;
    }

    length = 0;

    if (!n) {
        // empty path, normalize to current directory
        goto normalize_curdir;
    }
       
    if (*p == '/') {
        // POSIX paths may begin with one or two slashes, but three or more
        // are treated as a single slash
        ++length; ++p; --n;
        if (n && (*p == '/')) {
            ++p; --n;
            q = strncchr(p, '/', n);
            if (!q) {
                q = p + n;
            }

            if (q == p) {
                ++length;
            }
            else {
                n -= q - p;
                p = q;
            }
        }

        // copy one or two slashes into destination buffer
        for (i = 0, m = (length < dest_max) ? length : dest_max - 1; i < m; ++i) {
            dest[i] = '/';
        }

        if (!n) {
            goto null_terminate;
        }
    }

    // create a local copy of input path for tokenization
    buffer = static_cast<char*>(malloca(n + 1));
    if (!buffer) {
        return -1;
    }

    *static_cast<char*>(mempcpy(buffer, p, n)) = '\0';

    // tokenize the local path and normalize parts into stack
    i = 0;
    retval = 0;
    state = nullptr;
    parts = default_parts;
    parts_size = sizeof(default_parts) / sizeof(default_parts[0]);

    for (q = strtok_r(buffer, path<char>::sepset, &state); q; q = strtok_r(nullptr, path<char>::sepset, &state)) {
        // skip part if curdir, pop top part off stack if pardir, and if
        // the path is absolute (length is non-zero), eat all of the
        // redundant pardir parts
        if (*q == '.') {
            if (q[1] == '\0') {
                continue;
            }
            else if ((q[1] == '.') && (q[2] == '\0')) {
                if (i > 0) {
                    --i;
                    continue;
                }
                else if (length > 0) {
                    continue;
                }
            }
        }

        // resize the path parts stack if space is exhausted
        if (i >= parts_size) {
            if (parts_size >= (SIZE_MAX / (3 * sizeof(char const*)))) {
                errno = ENOMEM;
                retval = -1;
                goto cleanup;
            }

            new_parts_size = (parts_size * 3) / 2;
            new_parts = static_cast<char const**>(malloc(new_parts_size * sizeof(char const*)));
            if (!new_parts) {
                retval = -1;
                goto cleanup;
            }

            memcpy(new_parts, parts, parts_size * sizeof(char const*));
            if (parts != default_parts) {
                free(parts);
            }

            parts = new_parts;
            parts_size = new_parts_size;
        }

        // push part onto stack
        parts[i++] = q;
    }

    // rejoin the path parts in normalized form
    if (length < dest_max) {
        retval = join_path(dest + length, dest_max - length, parts, i);
    }
    else {
        retval = join_path(nullptr, 0, parts, i);
    }

cleanup:

    // free temporary buffers
    if (parts != default_parts) {
        free(parts);
    }

    freea(buffer);

    // update length with full length from join
    if (retval < 0) {
        return -1;
    }

    length += static_cast<size_t>(retval);
    if (!length) {
        goto normalize_curdir;
    }
    else if (length > SSIZE_MAX) {
        errno = EOVERFLOW;
        return -1;
    }

    return static_cast<ssize_t>(length);

normalize_curdir:

    // empty path, normalize to current directory
    UP_ASSERT(!length);
    ++length;
    if (dest_max > 0) {
        dest[0] = '.';
    }

null_terminate:

    // null terminate the destination buffer
    if (length < dest_max) {
        dest[length] = '\0';
    }
    else if (dest_max > 0) {
        dest[dest_max - 1] = '\0';
    }

    return static_cast<ssize_t>(length);
}

Name: Anonymous 2012-06-19 13:08

>>5,7
Thanks for this. I tried it out, and it failed a few of my test cases, so when modifying it to fit my function signature, I made a few changes to fix them (I can't assume the input string is null-terminated, nor can I assume the destination buffer is large enough to accommodate the output). But it cuts out the buffer allocation for use with strtok and the join_path call, thus speeding things up quite a bit which is awesome, so thanks. I debated about whether to cap the path depth and just return EOVERFLOW and not reallocate on the heap for additional space like in your code, but decided to keep that in to make it more robust for edge cases. I also didn't need to worry about Windows style paths, as I have another set of path manipulation functions for that platform which handle it, similar to how Python does it.

LIBUPCOREAPI
ssize_t normalize_path(char* restrict dest, size_t dest_max, char const* restrict p, size_t n) noexcept {
    char const* default_parts[64];
    char const** parts, **new_parts;
    char const* const p_end = p + n;
    size_t parts_depth, parts_size, new_parts_size, length, i;
       
    // validate arguments
    if ((dest_max > (SSIZE_MAX + 1)) || (!dest && dest_max) || (!p && n)) {
        errno = EINVAL;
        return -1;
    }

    length = 0;

    // POSIX paths may begin with one or two slashes, but three or more
    // are treated as a single slash
    for ( ; (p < p_end) && (*p == '/'); ++p) {
        if (length < dest_max) {
            dest[length] = '/';
        }
        if (++length > 2) {
            length = 1;
            break;
        }
    }

    // tokenize the local path and normalize parts into stack
    parts = default_parts;
    parts_size = sizeof(default_parts) / sizeof(default_parts[0]);
    parts_depth = 0;

    for ( ; p < p_end; ++p) {
        // skip part if curdir, pop top part off stack if pardir, and if
        // the path is absolute (length is non-zero), eat all of the
        // redundant pardir parts
        if (*p == '/') {
            continue;
        }
        else if ((p[0] == '.') && ((p[1] == '/') || (p[1] == '\0'))) {
            ++p;
            continue;
        }
        else if ((p[0] == '.') && (p[1] == '.') && ((p[2] == '/') || (p[2] == '\0'))) {
            if (parts_depth) {
                --parts_depth;
                p += 2;
                continue;
            }
            else if (length) {
                p += 2;
                continue;
            }
        }

        // resize the path parts stack if space is exhausted
        if (parts_depth >= parts_size) {
            if (parts_size >= (SIZE_MAX / (3 * sizeof(char const*)))) {
                errno = ENOMEM;
                length = SIZE_MAX;
                goto cleanup;
            }

            new_parts_size = (parts_size * 3) / 2;
            new_parts = static_cast<char const**>(malloc(new_parts_size * sizeof(char const*)));
            if (!new_parts) {
                length = SIZE_MAX;
                goto cleanup;
            }

            memcpy(new_parts, parts, parts_size * sizeof(char const*));
            if (parts != default_parts) {
                free(parts);
            }

            parts = new_parts;
            parts_size = new_parts_size;
        }

        // push part onto stack
        parts[parts_depth] = p;
        ++parts_depth;

        // find start of next part
        for ( ; (p < p_end) && (*p != '/'); ++p) ;
    }

    // rejoin the path parts in normalized form
    for (i = 0; i < parts_depth; ++i) {
        for (p = parts[i]; (p < p_end) && (*p != '/'); ++length, ++p) {
            if (length < dest_max) {
                dest[length] = *p;
            }
        }

        if (i != (parts_depth - 1)) {
            if (length < dest_max) {
                dest[length] = '/';
            }
            ++length;
        }
    }

cleanup:

    // free temporary buffers
    if (parts != default_parts) {
        free(parts);
    }

    // check for error conditions
    if (length == SIZE_MAX) {
        return -1;
    }
    else if (length > SSIZE_MAX) {
        errno = EOVERFLOW;
        return -1;
    }

    // check for empty path, normalize to current directory
    if (!length) {
        ++length;
        if (dest_max > 0) {
            dest[0] = '.';
        }
    }

    // null terminate the destination buffer
    if (length < dest_max) {
        dest[length] = '\0';
    }
    else if (dest_max > 0) {
        dest[dest_max - 1] = '\0';
    }

    return static_cast<ssize_t>(length);
}

LIBUPCOREAPI UPALLOC UPWARNRESULT
char* normalize_path(char const* p) noexcept {
    return normalize_path(p, p ? strlen(p) : 0);
}

LIBUPCOREAPI UPALLOC UPWARNRESULT
char* normalize_path(char const* p, size_t n) noexcept {
    char default_buffer[1024];
    char* retval;
    size_t length;
    ssize_t slength;

    slength = normalize_path(default_buffer, sizeof(default_buffer), p, n);
    if (slength < 0) {
        return nullptr;
    }

    length = static_cast<size_t>(slength);
    if (length < sizeof(default_buffer)) {
        return strndup(default_buffer, length);
    }

    retval = static_cast<char*>(malloc(length + 1));
    if (!retval) {
        return nullptr;
    }

    UP_VERIFY(slength == normalize_path(retval, length + 1, p, n));
    return retval;
}


And my unit test...

UP_TEST_CASE(posix_normalize_path) {
    char* path;
       
    path = up::filesystem::posix::normalize_path((char const*)nullptr);
    require(path && !up::strcmp(path, "."));
    free(path);

    path = up::filesystem::posix::normalize_path("");
    require(path && !up::strcmp(path, "."));
    free(path);

    path = up::filesystem::posix::normalize_path(".");
    require(path && !up::strcmp(path, "."));
    free(path);

    path = up::filesystem::posix::normalize_path("hello");
    require(path && !up::strcmp(path, "hello"));
    free(path);
       
    path = up::filesystem::posix::normalize_path("/hello");
    require(path && !up::strcmp(path, "/hello"));
    free(path);

    path = up::filesystem::posix::normalize_path("//hello");
    require(path && !up::strcmp(path, "//hello"));
    free(path);

    path = up::filesystem::posix::normalize_path("///hello");
    require(path && !up::strcmp(path, "/hello"));
    free(path);

    path = up::filesystem::posix::normalize_path("hello/sussman/../../");
    require(path && !up::strcmp(path, "."));
    free(path);

    path = up::filesystem::posix::normalize_path("hello/sussman/../../..");
    require(path && !up::strcmp(path, ".."));
    free(path);

    path = up::filesystem::posix::normalize_path("/hello/sussman/../../../");
    require(path && !up::strcmp(path, "/"));
    free(path);

    path = up::filesystem::posix::normalize_path(".//hello//sussman//");
    require(path && !up::strcmp(path, "hello/sussman"));
    free(path);

    path = up::filesystem::posix::normalize_path("hello/sussman/sicp.pdf");
    require(path && !up::strcmp(path, "hello/sussman/sicp.pdf"));
    free(path);

    path = up::filesystem::posix::normalize_path("hello/../sussman/sicp.pdf");
    require(path && !up::strcmp(path, "sussman/sicp.pdf"));
    free(path);

    path = up::filesystem::posix::normalize_path("hello/../../sussman/sicp.pdf");
    require(path && !up::strcmp(path, "../sussman/sicp.pdf"));
    free(path);

    path = up::filesystem::posix::normalize_path("/hello/../../sussman/sicp.pdf");
    require(path && !up::strcmp(path, "/sussman/sicp.pdf"));
    free(path);

    path = up::filesystem::posix::normalize_path("./hello//sussman/sicp.pdf");
    require(path && !up::strcmp(path, "hello/sussman/sicp.pdf"));
    free(path);

    path = up::filesystem::posix::normalize_path("//hello/./sussman/..///sicp.pdf");
    require(path && !up::strcmp(path, "//hello/sicp.pdf"));
    free(path);
}

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List