/prog/ - C and goto

Name: Anonymous 2012-06-19 6:00

Is it okay to use goto in C for functions like this? (It's actually C++, but I'm doing C-style development.) Is this function too long, or is it okay due to the nature of null-terminated string processing? It's been a long time since I've done C-style development. Any other recommendations (except "use Lisp or XYZ language instead")?



ssize_t normalize_path(char* restrict dest, size_t dest_max, char const* restrict p, size_t n) noexcept {

    size_t i, m, length;

    ssize_t retval;

    char const* q;

    char* buffer, * state;

    char const** parts, **new_parts;

    size_t parts_size, new_parts_size;

    char const* default_parts[32];



    // validate arguments

    if ((dest_max > (SSIZE_MAX + 1)) || (!dest && dest_max) || (!p && n)) {

        errno = EINVAL;

        return -1;

    }



    length = 0;



    if (!n) {

        // empty path, normalize to current directory

        goto normalize_curdir;

    }

        

    if (*p == '/') {

        // POSIX paths may begin with one or two slashes, but three or more

        // are treated as a single slash

        ++length; ++p; --n;

        if (n && (*p == '/')) {

            ++p; --n;

            q = strncchr(p, '/', n);

            if (!q) {

                q = p + n;

            }



            if (q == p) {

                ++length;

            }

            else {

                n -= q - p;

                p = q;

            }

        }



        // copy one or two slashes into destination buffer

        for (i = 0, m = (length < dest_max) ? length : dest_max - 1; i < m; ++i) {

            dest[i] = '/';

        }



        if (!n) {

            goto null_terminate;

        }

    }



    // create a local copy of input path for tokenization

    buffer = static_cast<char*>(malloca(n + 1));

    if (!buffer) {

        return -1;

    }



    *static_cast<char*>(mempcpy(buffer, p, n)) = '\0';



    // tokenize the local path and normalize parts into stack

    i = 0;

    retval = 0;

    state = nullptr;

    parts = default_parts;

    parts_size = sizeof(default_parts) / sizeof(default_parts[0]);



    for (q = strtok_r(buffer, path<char>::sepset, &state); q; q = strtok_r(nullptr, path<char>::sepset, &state)) {

        // skip part if curdir, pop top part off stack if pardir, and if

        // the path is absolute (length is non-zero), eat all of the 

        // redundant pardir parts

        if (*q == '.') {

            if (q[1] == '\0') {

                continue;

            }

            else if ((q[1] == '.') && (q[2] == '\0')) {

                if (i > 0) {

                    --i;

                    continue;

                }

                else if (length > 0) {

                    continue;

                }

            }

        }



        // resize the path parts stack if space is exhausted

        if (i >= parts_size) {

            if (parts_size >= (SIZE_MAX / (3 * sizeof(char const*)))) {

                errno = ENOMEM;

                retval = -1;

                goto cleanup;

            }



            new_parts_size = (parts_size * 3) / 2;

            new_parts = static_cast<char const**>(malloc(new_parts_size * sizeof(char const*)));

            if (!new_parts) {

                retval = -1;

                goto cleanup;

            }



            memcpy(new_parts, parts, parts_size * sizeof(char const*));

            if (parts != default_parts) {

                free(parts);

            }



            parts = new_parts;

            parts_size = new_parts_size;

        }



        // push part onto stack

        parts[i++] = q;

    }



    // rejoin the path parts in normalized form

    if (length < dest_max) {

        retval = join_path(dest + length, dest_max - length, parts, i);

    }

    else {

        retval = join_path(nullptr, 0, parts, i);

    }



cleanup:



    // free temporary buffers

    if (parts != default_parts) {

        free(parts);

    }



    freea(buffer);



    // update length with full length from join

    if (retval < 0) {

        return -1;

    }



    length += static_cast<size_t>(retval);

    if (!length) {

        goto normalize_curdir;

    }

    else if (length > SSIZE_MAX) {

        errno = EOVERFLOW;

        return -1;

    }



    return static_cast<ssize_t>(length);



normalize_curdir:



    // empty path, normalize to current directory

    UP_ASSERT(!length);

    ++length;

    if (dest_max > 0) {

        dest[0] = '.';

    }



null_terminate:



    // null terminate the destination buffer

    if (length < dest_max) {

        dest[length] = '\0';

    }

    else if (dest_max > 0) {

        dest[dest_max - 1] = '\0';

    }



    return static_cast<ssize_t>(length);

}

Name: Anonymous 2012-06-19 6:17

Is there any reason why you can't just move that goto code to where it was called from?

Name: Cudder !MhMRSATORI!FBeUS42x4uM+kgp 2012-06-19 6:31

That looks like an overly complicated way to do it, if all you want to do is remove duplicate '/', strip '.', and have '..' strip the previous component.

As long as both have length remaining, copy characters from source to dest. In the case of "./", don't advance the dest and the next copy will overwrite it naturally. In the case of "../", backup to immediately after the 2nd-last "/" seen (e.g. /parent/../ -> /). In the case of "/", update the last and 2nd-last "/" position for the parent normalisation. No need to even allocate memory, this can be done in-place.

Name: Anonymous 2012-06-19 7:01

Yes, it is perfectly ok to use GOTO. As a side note, there are many language purists that hold a different opinion. It might be because they assume you will goto to any wild location labelling it spaghetti. "continue" and "break" in loops can also be considered a goto, but to a well defined location.

GOTO to a well defined location -- that is a location that will not create variables with undefined values, or deeper into hierarchy -- and you have enough ammo to defend your call.

Name: Anonymous 2012-06-19 7:29

I made this in 25 minutes. It's ugly but it works and is efficient.

#include <stdio.h>

#include <stdlib.h>



#define MAX_DEPTH 64



char* normalise(char* dst, const char* path)

{

        const char* path_stack[MAX_DEPTH];

        size_t path_depth = 0;

        size_t di = 0;

        size_t si = 0;

        while (*path && (path_depth < MAX_DEPTH)) {

                if ((*path == '/') || (*path == '\\'))

                        path++;

                if (!(('.' == path[0]) &&

                      (('/' == path[1]) || ('\\' == path[1])))) {

                        if (('.' == path[0]) &&

                            ('.' == path[1]) &&

                            (('/' == path[2]) || ('\\' == path[2]))) {

                                if (path_depth)

                                        path_depth--;

                                path += 3;

                        } else {

                                path_stack[path_depth] = path;

                                path_depth++;

                                do {

                                        if (*path == 0)

                                                break;

                                        path++;

                                } while ((*path != '/') && (*path != '\\'));

                        }

                } else {

                        path += 2;

                }

        }



        for (si = 0; si < path_depth; si++) {

                const char* c;

                for (c = path_stack[si];

                     (*c != '\\') && (*c != '/') && *c;

                     c++) {

                        dst[di++] = *c;

                        if (di == 255) goto out;

                }

                if (si != (path_depth - 1))

                        dst[di++] = '/';

                if (di == 255) goto out;

        }

out:

        dst[di] = 0;

}



void test(const char* path)

{

        static char norm[256];

        normalise(norm, path);

        printf("%s -> %s\n", path, norm);

}



int main()

{

        test("/foo/bar/../../../../bar/foo/./");

        test("./bar/../foo");

        test("c:\\cygwin\\home\\zun\\prog\\th11\\src\\core\\../core/zunlib.h");

        return 0;

}

Name: >>5 2012-06-19 7:33

Bugfix.

13c13

<         if ((*path == '/') || (*path == '\\'))

---

        while ((*path == '/') || (*path == '\\'))

Name: >>5 2012-06-19 7:43

More bugfixes.

#include <stdio.h>

#include <stdlib.h>



#define MAX_DEPTH 64



void normalise(char* dst, const char* path)

{

     const char* path_stack[MAX_DEPTH];

    size_t path_depth = 0;

    size_t di = 0;

    size_t si = 0;

    while (*path && (path_depth < MAX_DEPTH)) {

        while ((*path == '/') || (*path == '\\'))

                        path++;

        if (!(('.' == path[0]) &&

              (('/' == path[1]) || ('\\' == path[1]) || !path[1]))) {

                if (('.' == path[0]) &&

                    ('.' == path[1]) &&

                (('/' == path[2]) || ('\\' == path[2]) || !path[2])) {

                        if (path_depth)

                                    path_depth--;

                                path += 2;

                        } else {

                                path_stack[path_depth] = path;

                path_depth++;

                                do {

                                        if (*path == 0)

                                             break;

                                    path++;

                            } while ((*path != '/') && (*path != '\\'));

                        }

        } else {

                        path++;

                }

     }



    for (si = 0; si < path_depth; si++) {

            const char* c;

            for (c = path_stack[si];

                 (*c != '\\') && (*c != '/') && *c;

                 c++) {

                        dst[di++] = *c;

            if (di == 255) goto out;

        }

                if (si != (path_depth - 1))

            dst[di++] = '/';

        if (di == 255) goto out;

    }

out:

        dst[di] = 0;

}

 

void test(const char* path)

{

     static char norm[256];

    normalise(norm, path);

    printf("`%s` -> `%s`\n", path, norm);

}

 

int main()

{

     test("");

    test("/");

    test("..");

    test("/foo/bar/../../../../bar/foo/./");

    test("./bar/../foo");

    test("//c:\\cygwin\\home\\zun\\prog\\th11\\src\\core\\../core/zunlib.h");

    return 0;

}

Name: Anonymous 2012-06-19 8:12

I like to use goto instead of blocks. Curly braces are so ugly!

For example, instead of:



int fib(int n) 

{

  if (n < 2) {

    return n;

  } else {

    return fib(n-1) + fib(n-2);

  }

}

I use this instead:



int fib(int n) 

{

  if (n < 2) goto L1;

  else goto L2;

L1: return n;

L2: return fib(n-1) + fib(n-2);

}

Name: Anonymous 2012-06-19 8:32

>>8
Yar Retoided.

Name: Anonymous 2012-06-19 9:55

I hate it, when in C, the code execution implicitily goes to next line, so I always explicitily write where the execution should go.

I also never create other functions that `main`, since calling them is so slow. I've developed a naming convention, where every line has label like [PROC NAME]_[LOCAL LINE NUMBER]. This is very efficient in keeping the code simple (yeah, I think following KISS principle is good).

In my opinion, this results in extremely easy code to read. There is never need for guessin where the execution goes next. Always expicit goto.

Here's an example.



greet_L1: char buf[16]; goto L2;

greet_L2: puts("Who are you?"); goto L3;

greet_L3: gets(buf); goto L4;

greet_L4: printf("Hello "); goto L5;

greet_L5: printf(buf); goto L6;

greet_L6: goto exit_L1;

exit_L1: return 0;

Name: Anonymous 2012-06-19 9:58

>>10
That's what ZUN's scripting language looks like. Confirmed for Japanese quality.

Name: Anonymous 2012-06-19 11:11

Actually I think we could create some kind of convention on how to use goto in a good way...
For example goto is usually criticized for making the code unreadable because it's hard to know where the goto actually go's, but then we could that by convention endb goes to the end of block:



if (..) {

  if (..) {

    goto endb1;

  else

    ..

  }

}endb1:

A Vim script could manage very easily the number of the block.

My conclusion is that people hates the goto statement because they don't have imagination.

Name: Anonymous 2012-06-19 11:58

吥ʒ醔⤥㊈ᆂ䜧礡▀猐┱刄㤹儰ᐱ⥨䤅葁捣鄨ᚅ隕醈Ŧ频㦙夅斐᎖卑ሹ蔦ㄳ垉啅ف㌖ㅰ͙ᙉ鍰销葒Ɔv戡䤈⥧䒉瑦䡑搤ا䈅堘䙐䞔ᄧ䦇斃䁳ॆᝂ顦ᢙ⦉⡇՘hᙇ褀襷億㢘栈ↁ吤椹ᅤࡕ暙剕ቃ咖镴坹頢劉̅ᥩ牲Ź蚕⍇硖憈嘦甡㌉₉ņ熐ᙩᐅ怴鈁餁堈ن㈘煠霰愃撐⡗䐳醕蒘䥩啕堇遆☘⍠Đ⒅┑ख़㕰嘵┨ኔ挠斀䡑慦䐹荥ȅ䌦ፗ䄆饸饨ࠩ蔒䜆杷腅蠈啐奘䡓䁡衉瞖ᜆ♩⑤▆䔨慩ᅶ夡陦匣֗ᙙ霣䥡ᥰ᎔㜃ن᠙】ঁ鐖琠㔔灢蘳饱㜆大ㄘᑡ掙䥖斖㜓ဘ撁ƒ聕㤤փ甃⁗栆㖈怰ᐶ打ᘵ҃剆ؖ㌳⑂␷祡杒❓ᅷ㌇煓呥鉧≘䕔ፒᦆ戆䁢暑ᐸ嘷錷鄖炙㌓霸餐ᜃ饤⤒葧捉⦂堶☳ᦈ䠁䠱⁤⅀煱℗袃癹䌠萆➄喃畃〩蝆ᕁ♣钖ᑱ咇楔‧㐕≠炘熐脑䝣瀩ↂ獒ٴ硇ⅴ顦䙱挳鍅〓ㅩᅄ礐݄Ā㉔椂م聑扐v瘕ؕ−隂㑙⡣

Name: Anonymous 2012-06-19 13:08

>>5,7
Thanks for this. I tried it out, and it failed a few of my test cases, so when modifying it to fit my function signature, I made a few changes to fix them (I can't assume the input string is null-terminated, nor can I assume the destination buffer is large enough to accommodate the output). But it cuts out the buffer allocation for use with strtok and the join_path call, thus speeding things up quite a bit which is awesome, so thanks. I debated about whether to cap the path depth and just return EOVERFLOW and not reallocate on the heap for additional space like in your code, but decided to keep that in to make it more robust for edge cases. I also didn't need to worry about Windows style paths, as I have another set of path manipulation functions for that platform which handle it, similar to how Python does it.



LIBUPCOREAPI

ssize_t normalize_path(char* restrict dest, size_t dest_max, char const* restrict p, size_t n) noexcept {

    char const* default_parts[64];

    char const** parts, **new_parts;

    char const* const p_end = p + n;

    size_t parts_depth, parts_size, new_parts_size, length, i;

        

    // validate arguments

    if ((dest_max > (SSIZE_MAX + 1)) || (!dest && dest_max) || (!p && n)) {

        errno = EINVAL;

        return -1;

    }



    length = 0;



    // POSIX paths may begin with one or two slashes, but three or more

    // are treated as a single slash

    for ( ; (p < p_end) && (*p == '/'); ++p) {

        if (length < dest_max) {

            dest[length] = '/';

        }

        if (++length > 2) {

            length = 1;

            break;

        }

    }



    // tokenize the local path and normalize parts into stack

    parts = default_parts;

    parts_size = sizeof(default_parts) / sizeof(default_parts[0]);

    parts_depth = 0;



    for ( ; p < p_end; ++p) {

        // skip part if curdir, pop top part off stack if pardir, and if

        // the path is absolute (length is non-zero), eat all of the 

        // redundant pardir parts

        if (*p == '/') {

            continue;

        }

        else if ((p[0] == '.') && ((p[1] == '/') || (p[1] == '\0'))) {

            ++p;

            continue;

        }

        else if ((p[0] == '.') && (p[1] == '.') && ((p[2] == '/') || (p[2] == '\0'))) {

            if (parts_depth) {

                --parts_depth;

                p += 2;

                continue;

            }

            else if (length) {

                p += 2;

                continue;

            }

        }



        // resize the path parts stack if space is exhausted

        if (parts_depth >= parts_size) {

            if (parts_size >= (SIZE_MAX / (3 * sizeof(char const*)))) {

                errno = ENOMEM;

                length = SIZE_MAX;

                goto cleanup;

            }



            new_parts_size = (parts_size * 3) / 2;

            new_parts = static_cast<char const**>(malloc(new_parts_size * sizeof(char const*)));

            if (!new_parts) {

                length = SIZE_MAX;

                goto cleanup;

            }



            memcpy(new_parts, parts, parts_size * sizeof(char const*));

            if (parts != default_parts) {

                free(parts);

            }



            parts = new_parts;

            parts_size = new_parts_size;

        }



        // push part onto stack

        parts[parts_depth] = p;

        ++parts_depth;



        // find start of next part

        for ( ; (p < p_end) && (*p != '/'); ++p) ;

    }



    // rejoin the path parts in normalized form

    for (i = 0; i < parts_depth; ++i) {

        for (p = parts[i]; (p < p_end) && (*p != '/'); ++length, ++p) {

            if (length < dest_max) {

                dest[length] = *p;

            }

        }



        if (i != (parts_depth - 1)) {

            if (length < dest_max) {

                dest[length] = '/';

            }

            ++length;

        }

    }



cleanup:



    // free temporary buffers

    if (parts != default_parts) {

        free(parts);

    }



    // check for error conditions

    if (length == SIZE_MAX) {

        return -1;

    }

    else if (length > SSIZE_MAX) {

        errno = EOVERFLOW;

        return -1;

    }



    // check for empty path, normalize to current directory

    if (!length) {

        ++length;

        if (dest_max > 0) {

            dest[0] = '.';

        }

    }



    // null terminate the destination buffer

    if (length < dest_max) {

        dest[length] = '\0';

    }

    else if (dest_max > 0) {

        dest[dest_max - 1] = '\0';

    }



    return static_cast<ssize_t>(length);

}



LIBUPCOREAPI UPALLOC UPWARNRESULT

char* normalize_path(char const* p) noexcept {

    return normalize_path(p, p ? strlen(p) : 0);

}



LIBUPCOREAPI UPALLOC UPWARNRESULT

char* normalize_path(char const* p, size_t n) noexcept {

    char default_buffer[1024];

    char* retval;

    size_t length;

    ssize_t slength;



    slength = normalize_path(default_buffer, sizeof(default_buffer), p, n);

    if (slength < 0) {

        return nullptr;

    }



    length = static_cast<size_t>(slength);

    if (length < sizeof(default_buffer)) {

        return strndup(default_buffer, length);

    }



    retval = static_cast<char*>(malloc(length + 1));

    if (!retval) {

        return nullptr;

    }



    UP_VERIFY(slength == normalize_path(retval, length + 1, p, n));

    return retval;

}

And my unit test...



UP_TEST_CASE(posix_normalize_path) {

    char* path;

        

    path = up::filesystem::posix::normalize_path((char const*)nullptr);

    require(path && !up::strcmp(path, "."));

    free(path);



    path = up::filesystem::posix::normalize_path("");

    require(path && !up::strcmp(path, "."));

    free(path);



    path = up::filesystem::posix::normalize_path(".");

    require(path && !up::strcmp(path, "."));

    free(path);



    path = up::filesystem::posix::normalize_path("hello");

    require(path && !up::strcmp(path, "hello"));

    free(path);

        

    path = up::filesystem::posix::normalize_path("/hello");

    require(path && !up::strcmp(path, "/hello"));

    free(path);



    path = up::filesystem::posix::normalize_path("//hello");

    require(path && !up::strcmp(path, "//hello"));

    free(path);



    path = up::filesystem::posix::normalize_path("///hello");

    require(path && !up::strcmp(path, "/hello"));

    free(path);



    path = up::filesystem::posix::normalize_path("hello/sussman/../../");

    require(path && !up::strcmp(path, "."));

    free(path);



    path = up::filesystem::posix::normalize_path("hello/sussman/../../..");

    require(path && !up::strcmp(path, ".."));

    free(path);



    path = up::filesystem::posix::normalize_path("/hello/sussman/../../../");

    require(path && !up::strcmp(path, "/"));

    free(path);



    path = up::filesystem::posix::normalize_path(".//hello//sussman//");

    require(path && !up::strcmp(path, "hello/sussman"));

    free(path);



    path = up::filesystem::posix::normalize_path("hello/sussman/sicp.pdf");

    require(path && !up::strcmp(path, "hello/sussman/sicp.pdf"));

    free(path);



    path = up::filesystem::posix::normalize_path("hello/../sussman/sicp.pdf");

    require(path && !up::strcmp(path, "sussman/sicp.pdf"));

    free(path);



    path = up::filesystem::posix::normalize_path("hello/../../sussman/sicp.pdf");

    require(path && !up::strcmp(path, "../sussman/sicp.pdf"));

    free(path);



    path = up::filesystem::posix::normalize_path("/hello/../../sussman/sicp.pdf");

    require(path && !up::strcmp(path, "/sussman/sicp.pdf"));

    free(path);



    path = up::filesystem::posix::normalize_path("./hello//sussman/sicp.pdf");

    require(path && !up::strcmp(path, "hello/sussman/sicp.pdf"));

    free(path);



    path = up::filesystem::posix::normalize_path("//hello/./sussman/..///sicp.pdf");

    require(path && !up::strcmp(path, "//hello/sicp.pdf"));

    free(path);

}

Name: Anonymous 2012-06-19 13:38

strtok()

Please don't do this. Use Boost.Tokenizer.

Name: Anonymous 2012-06-19 13:39

>>10
You realize that adds overhead too?

Name: Anonymous 2012-06-19 13:48

>>15
Use Boost.Tokenizer.
And import nearly 17 million lines of half-assed code into your project and quadruple compile-times? No thanks. And I don't use strtok, but rather the safer reenterant version, strtok_r.

Name: Anonymous 2012-06-19 13:58

>>17
not using pre-compiled headers

Name: Anonymous 2012-06-19 14:04

>>18
Boost also increases executable bloat. I'm staying away from most of the Standard C++ library and OOP language features for a reason.

Name: Anonymous 2012-06-19 14:48

C++ is a good language. It is not a perfect language because it inherits from C. C is a flawed language where many things are left undefined. C is an ancient artifact that serves no purpose outside of the domain of kernel design. Because of the improvements made upon C to form C++, beginning programmers and veteran programmers alike may be led astray, thinking that modern C usage is a good idea. It is a mistake to believe the success of C++ justifies the continued use and popularity of C. Just because C++ is successful does not mean the language it has inherited from is of high quality.

Name: Anonymous 2012-06-19 17:03

This isn't C, also your code is unreasonably complicated.

Name: Anonymous 2012-06-19 17:05

>>19
Whether you like it or not (and I don't see how anyone can like it), the C++ standard library, libstdc++ or whatever your compiler uses, is required for pretty much all useful C++ language features

Name: Anonymous 2012-06-19 17:11

>>19
>Boost also increases executable bloat
No it does not, not any more than using other libraries or even your own handwritten stuff to accomplish the same thing.

Name: Anonymous 2012-06-19 18:36

>>23
Yes it does. When you're doing game development or artificial intelligence where every byte and cycle can count, you can't afford using inefficient language or library features. Even C++ exception handling causes problems because it increases the instruction count for stack unwinding, which can increase the number of cache misses.

Name: Anonymous 2012-06-19 18:37

>>22
Namespaces, lambdas, const, noexcept, constexpr, r-value references don't require any use of the Standard C++ Library.

Name: Anonymous 2012-06-19 19:25

my tests show using goto is significantly less optimizable by gcc than the more common constructs, take that as you will

Name: Anonymous 2012-06-19 19:50

>>24
Boost is not inefficient. You have no idea what you're talking about, do you?

Name: Anonymous 2012-06-19 20:05

>>27
Boost is written in C++, and therefore required to be inefficient. If it isn't inefficient, then it must violate the C++ standard in some way.

Name: Anonymous 2012-06-19 20:59

>>28
Boost is written in C++, and not required to be inefficient. If it isn't inefficient, then it must be decently implemented.

Name: Anonymous 2012-06-19 21:15

>>27
Yes it is. Prove that it isn't. Extraordinary claims require extraordinary evidence. Show me the benchmarks between Boost.Filesystem and regular C-string processing + POSIX syscalls.

Name: Anonymous 2012-06-20 1:30

>>16
Nope, smart C compiler can optimize it.

Name: Anonymous 2012-06-20 1:47

>>28
C is not required to be efficient either, ``faggot.''

Name: Anonymous 2012-06-20 3:19

Check out these doubles

Name: Anonymous 2012-06-20 3:22

>>31
Yeah, ignore him, he's probably using the Tiny C Compiler.

Name: Anonymous 2012-06-20 3:23

>>33
You're lucky, if >>34 was 4 seconds faster, you would have missed your dubs.

Name: Anonymous 2012-06-20 4:56

>>25
>Namespaces
Fairly useless
>lambdas
Gimmick
>const
Not a C++ feature
>noexcept
Is the point of this solely to force the compiler into stupid situations? throw() in C++ generally results in worse code
>constexpr
Pretty meh. for a feature
>r-value references
How is this any different from: bool operator=(SomeObject& rhs)

And it wouldn't surprise me if more than one of these features required the C++ standard library. Hell, even virtual functions requires it.

Name: Anonymous 2012-06-20 5:34

>>36

this post was oozing of jelly, my hands are sticky.

Name: Anonymous 2012-06-20 5:35

>>36
If you're not going to write C++, then stop whining about it and just write C. Nigger.

Name: Cudder !MhMRSATORI!FBeUS42x4uM+kgp 2012-06-20 6:24

>>14
You don't need dynamic allocation AT ALL. Your code is still much more complicated and inefficient than necessary. Here's a skeleton of what it should look like:

void normalise_path(char *out, size_t out_len, char *in, size_t in_len) {

 char *out_orig = out, *in_orig = in;

 char c;

 if (!in)

  goto done;

 while(out_len && in_len && (c=*in++) == '/') {

  /* do leading slash stuff here */

 }

 while(out_len && in_len && (c=*in++)) {

  switch(c) {

  case '/':

   if(in_len > 1 && *in == '/') { /* "//" */

    /* for you */

   } else if(in_len > 2 && in[0] == '.' && in[1] == '/') { /* "/./" */

    /* for you */

   } else if(in_len > 3 && in[0] == '.' && in[1] == '.' && in[2] == '/') { /* "/../" */

    /* for you */

   }

  default:

   *out++ = c;

   out_len--; in_len--;

   break;

  }

 }

done:

 if(out == out_orig && out_len) { /* nothing? '.' */

  *out++ = '.';

  out_len--;

 }

 if(out_len)

  *out = 0;

}

Name: Anonymous 2012-06-20 7:38

I didn't read it through, but I like your style. I think as long as it's just one function that you're not changing often, you're going to be fine.

Name: Anonymous 2012-06-21 6:29

>>39
Yes, I suppose you could just walk back to the previous '/' separator in the out buffer when ".." is encountered, and have some additional logic to handle when the end is reached.

The current code doesn't actually allocate from the heap any storage in most cases. Most paths aren't going to be more than 64 levels deep. It's just for rare edge cases.

That said, doing it your way is probably better, it doesn't need to touch characters in the input buffer more than once, and has a lesser instruction count.

Name: Anonymous 2012-06-21 20:19

>>41
Actually, I realized I do need the parts stack, because I can't rely on the destination buffer being there or being large enough, so once its exhausted it's useless for back-tracking to previous parts. Users pass in a null/empty destination buffer when wanting to compute the length of the normalized path in characters, so they can allocate storage and recall the function.

But then I realized that there's always an upper-bound for the depth of the path as a function of its length. It'll never be more than half of the path's length in characters. So then I just malloca the parts stack once and cut out the huge block for resizing it and its a big win. malloca is simply a preprocessor macro which uses alloca for small allocations that fit on the guard page (typically 1024 or 4096 bytes depending on the platform) or malloc otherwise, so for paths of 256 characters or less, there's almost not memory allocation overhead. In the end, it shaved off a few KB of machine instructions.

Here's the final version:



LIBUPCOREAPI

ssize_t normalize_path(char* restrict dest, size_t dest_max, char const* restrict p, size_t n) noexcept {

    char const* const p_end = p + n;

    size_t parts_depth, parts_size, length, i;

    char const** parts;

        

    // validate arguments

    if ((dest_max > (SSIZE_MAX + 1)) || (!dest && dest_max) || (!p && n)) {

        errno = EINVAL;

        return -1;

    }



    // POSIX paths may begin with one or two slashes, but three or more

    // are treated as a single slash

    for (length = 0; (p < p_end) && (*p == '/'); ++p) {

        if (length < dest_max) {

            dest[length] = '/';

        }

        if (++length > 2) {

            length = 1;

            break;

        }

    }



    // allocate parts stack with upper-bound of n / 2

    parts_depth = 0;

    parts_size = n / 2;

    parts = static_cast<char const**>(malloca(parts_size * sizeof(char const*)));

        

    // tokenize the local path and normalize parts into stack

    while (p < p_end) {

        if (*p == '/') {

            ++p;

            continue;

        }

        else if ((p[0] == '.') && ((p[1] == '/') || (p[1] == '\0'))) {

            p += 2;

            continue;

        }

        else if ((p[0] == '.') && (p[1] == '.') && ((p[2] == '/') || (p[2] == '\0'))) {

            if (parts_depth) {

                --parts_depth;

                p += 3;

                continue;

            }

            else if (length) {

                p += 3;

                continue;

            }

        }



        assert(parts_depth < parts_size);

        parts[parts_depth] = p;

        ++parts_depth;



        for ( ; (p < p_end) && (*p != '/'); ++p) ;

    }



    // rejoin the path parts in normalized form

    for (i = 0; i < parts_depth; ++i) {

        for (p = parts[i]; (p < p_end) && (*p != '/'); ++length, ++p) {

            if (length < dest_max) {

                dest[length] = *p;

            }

        }



        if (i != (parts_depth - 1)) {

            if (length < dest_max) {

                dest[length] = '/';

            }

            ++length;

        }

    }



    // free temporary buffers

    freea(parts);



    // check for error conditions

    if (length > SSIZE_MAX) {

        errno = EOVERFLOW;

        return -1;

    }



    // check for empty path, normalize to current directory

    if (!length) {

        ++length;

        if (dest_max) {

            dest[0] = '.';

        }

    }



    // null terminate the destination buffer

    if (length < dest_max) {

        dest[length] = '\0';

    }

    else if (dest_max) {

        dest[dest_max - 1] = '\0';

    }



    return static_cast<ssize_t>(length);

}

Name: Anonymous 2012-06-21 20:46

Use Lisp. C is shit.

Name: Anonymous 2012-06-21 22:55

>>43
C is shit outside of kernel design. And probably compilers.

Name: Anonymous 2012-06-21 23:00

>>44
For compilers, you want something that can easily manipulate AST trees, i.e. not C.

Name: Anonymous 2012-06-21 23:09

>>44
C is shit outside of kernel design.
U MENA ASSEMBLY

Name: Anonymous 2012-06-21 23:27

>>44
C is excellent for operating systems. The base of such systems could even be extended with languages like Lisp or Python.

Name: Anonymous 2012-06-21 23:38

>>47
How so? If you mean by playing with non-standard extensions, even Haskell can do that: http://programatica.cs.pdx.edu/House/

Name: Anonymous 2012-06-22 3:04

>>48
Software from the kernel, the compiler and the core utilities are well served with C. Low level libraries like GTK and X11 are fine with C. Extending the system means having bindings to high level languages through solutions like Guile or Swig. The base of a complex software (such as Gimp) can be implemented using C and then extended using Scheme.

Name: Anonymous 2012-06-22 4:20

>>49
GTK
oh no you didn't

Name: Anonymous 2012-06-22 4:31

>>50
It's a low level library that provides a base for other work. It's also written in C.

Name: Anonymous 2012-06-22 4:34

>>51
GTK is not a low level library and it uses Glib, an abomination that completely disfigure C.

Name: Anonymous 2012-06-22 5:16

>>52
GTK+ is the base for the GUI analogous to the win32api. It is low level software by that measure

Name: Anonymous 2012-06-22 11:38

>>53
I thought the equivalent of the win32 API was X11, I haven't ever touched it though. Either way GTK+ is very high level considering it brings in object oriented and metaobject facilities.

Name: Anonymous 2012-06-22 11:40

>>42
Why don't you just use C++ features if you're going to use C++ instead of C?

C and goto

1 Name: Anonymous 2012-06-19 6:00

2 Name: Anonymous 2012-06-19 6:17

3 Name: Cudder !MhMRSATORI!FBeUS42x4uM+kgp 2012-06-19 6:31

4 Name: Anonymous 2012-06-19 7:01

5 Name: Anonymous 2012-06-19 7:29

6 Name: >>5 2012-06-19 7:33

7 Name: >>5 2012-06-19 7:43

8 Name: Anonymous 2012-06-19 8:12

9 Name: Anonymous 2012-06-19 8:32

10 Name: Anonymous 2012-06-19 9:55

11 Name: Anonymous 2012-06-19 9:58

12 Name: Anonymous 2012-06-19 11:11

13 Name: Anonymous 2012-06-19 11:58

14 Name: Anonymous 2012-06-19 13:08

15 Name: Anonymous 2012-06-19 13:38

16 Name: Anonymous 2012-06-19 13:39

17 Name: Anonymous 2012-06-19 13:48

18 Name: Anonymous 2012-06-19 13:58

19 Name: Anonymous 2012-06-19 14:04

20 Name: Anonymous 2012-06-19 14:48

21 Name: Anonymous 2012-06-19 17:03

22 Name: Anonymous 2012-06-19 17:05

23 Name: Anonymous 2012-06-19 17:11

24 Name: Anonymous 2012-06-19 18:36

25 Name: Anonymous 2012-06-19 18:37

26 Name: Anonymous 2012-06-19 19:25

27 Name: Anonymous 2012-06-19 19:50

28 Name: Anonymous 2012-06-19 20:05

29 Name: Anonymous 2012-06-19 20:59

30 Name: Anonymous 2012-06-19 21:15

31 Name: Anonymous 2012-06-20 1:30

32 Name: Anonymous 2012-06-20 1:47

33 Name: Anonymous 2012-06-20 3:19

34 Name: Anonymous 2012-06-20 3:22

35 Name: Anonymous 2012-06-20 3:23

36 Name: Anonymous 2012-06-20 4:56

37 Name: Anonymous 2012-06-20 5:34

38 Name: Anonymous 2012-06-20 5:35

39 Name: Cudder !MhMRSATORI!FBeUS42x4uM+kgp 2012-06-20 6:24

40 Name: Anonymous 2012-06-20 7:38

41 Name: Anonymous 2012-06-21 6:29

42 Name: Anonymous 2012-06-21 20:19

43 Name: Anonymous 2012-06-21 20:46

44 Name: Anonymous 2012-06-21 22:55

45 Name: Anonymous 2012-06-21 23:00

46 Name: Anonymous 2012-06-21 23:09

47 Name: Anonymous 2012-06-21 23:27

48 Name: Anonymous 2012-06-21 23:38

49 Name: Anonymous 2012-06-22 3:04

50 Name: Anonymous 2012-06-22 4:20

51 Name: Anonymous 2012-06-22 4:31

52 Name: Anonymous 2012-06-22 4:34

53 Name: Anonymous 2012-06-22 5:16

54 Name: Anonymous 2012-06-22 11:38

55 Name: Anonymous 2012-06-22 11:40