/prog/ - C - read a random line from a file.

Name: Anonymous 2006-03-06 10:31

dear world4chan, I wrote this to return a random line read from a file, but I hate it. what would you do?

#define MAX_LINE_SIZE 1024

void readQuote(int sock_desc)
{
      //data
      FILE* quoteSource; //source file pointer
      int numLines;    //max number of lines
      int rndLine; //random line
      int i;
      char buff[MAX_LINE_SIZE] = {0};

      //initialize the random number generator
      srand(time(NULL));

      //open file; check for errors
      quoteSource = fopen("quotes.txt", "r");
      if (quoteSource == NULL)
      {
      printf("!! Error accessing file.\n");
      return;
      }

      //get the file size, if zero, exit
      numLines = 0;
      while (!feof(quoteSource))
      if (fgetc(quoteSource) == '\n') numLines++;
      rewind(quoteSource);

      if (numLines == 0)
      {
      printf("!! Quote file empty\n");
      fclose(quoteSource);
      return;
      }

      //select a line and read until that line is found
      rndLine = (rand() % numLines) + 1;
      printf("line %d of %d: ", rndLine, numLines);

      for (i = 0; i < rndLine; i++)
      fgets(buff, MAX_LINE_SIZE, quoteSource);

      printf("%s\n", buff);

      fclose(quoteSource);
}

Name: Anonymous 2006-03-06 10:43

$ ruby -ne 'l = $_ if rand < 1/$..to_f; END { puts l }' < /usr/share/dict/words

Name: Anonymous 2006-03-06 10:47

let's stick to C, please.

Name: Anonymous 2006-03-06 11:18

>>1
Ouch, that algorithm hurts. You don't have to know the number of lines to pick a random line. Just do some magic with RAND_MAX and division and you'll manage it. Too bad I can't remember how :p

Name: Shirizaan 2006-03-06 11:42

Hrm. Yeah that's a touch messy but I can't think of a better way to do it...

Name: Anonymous 2006-03-06 16:38

I would pick a random character from the file and work backwards and forwards from it until I find a full line. This means that the probability of picking a particular line is weighted towards the length of the line, but it is a lot quicker than reading them all in.

#define MAX_LINE_SIZE 1024

void readQuote()
{
    FILE* quoteSource; //source file pointer
    long fileSize = 0;
    long currentPos = 0;
    int readLength;
    int secondReadLength;
    // this is a circular buffer, so +2 for the null terminators,
    // one in middle and one at end
    char buff[MAX_LINE_SIZE+2];
    char *endOfLinePtr;
    char *startOfLinePtr;
    int i;

    // initialize the random number generator
    srand(time(NULL));

    //open file; check for errors
    quoteSource = fopen("quotes.txt", "r");
    if (quoteSource == NULL)
    {
      printf("!! Error accessing file.\n");
      return;
    }

    //get the file size, if zero, exit
    fseek(quoteSource, 0, SEEK_END);
    fileSize = ftell(quoteSource);

    if (fileSize <= 0)
    {
      printf("!! Quote file empty\n");
      fclose(quoteSource);
      return;
    }

    // select random position in the file
    // if RAND_MAX < filesize then make sure enough rand()s are called to compensate
    for (i=0;i<=fileSize/RAND_MAX;i++)
      currentPos += rand();
    currentPos %= fileSize;
    fseek(quoteSource, currentPos, SEEK_SET);

    // get and print line that character resides in
    readLength = fread(buff, 1, MAX_LINE_SIZE, quoteSource);
    endOfLinePtr = strchr(buff, '\n');
    if (endOfLinePtr == NULL)
    {
      // if '\n' wasn't found we either have an entire MAX_LINE_SIZE size
      // line or we just read less than that up to the end of the file.
      if (readLength < MAX_LINE_SIZE)
      {
      endOfLinePtr = buff + readLength;
      }
    }
    if (endOfLinePtr != NULL)
    {
      // fill remainder of buffer up with previous part of line
      endOfLinePtr[0] = '\0';
      endOfLinePtr++;
      secondReadLength = MAX_LINE_SIZE-(endOfLinePtr-buff);
      fseek(quoteSource, currentPos-secondReadLength, SEEK_SET);
      fread(endOfLinePtr, 1, secondReadLength, quoteSource);
      // then search for last newline for start of line
      startOfLinePtr = strrchr(endOfLinePtr, '\n');
      if (startOfLinePtr == NULL)
      {
      // if not found, must be entire line in buffer
      startOfLinePtr = endOfLinePtr;
      }
      else
      {
      startOfLinePtr++; // skip newline
      }
      // print latter part of buffer
      printf("%s", startOfLinePtr);
    }
    // print from start of buffer
    printf("%s\n", buff);

    fclose(quoteSource);
}

Name: Anonymous 2006-03-06 18:17

// select random position in the file
// if RAND_MAX < filesize then make sure enough rand()s are called to compensatefor (i=0;i<=fileSize/RAND_MAX;i++)
currentPos += rand();
currentPos %= fileSize;
FAIL for not just scaling it up or down

Name: Anonymous 2006-03-07 12:50

>>7

Idiot, that would leave gaps

Name: Anonymous 2006-03-08 19:40

>>4

Can't remember how my ass. You clearly haven't got a clue.

Name: Anonymous 2006-03-12 19:59

>>1
First of all, capitalize it properly like ReadQuote for fuck's sake. Stupid Java boys.

Name: Anonymous 2006-03-12 20:03

>>10

Thats really gonna fix his problem, jackass.

Name: Anonymous 2006-03-12 20:58

>>10-11

Problem already fixed in >>6, please keep up.

Name: Anonymous 2006-03-13 0:12

void readQuote() {
    FILE* quoteSource;
    char buf[MAX_LINE_SIZE];
    char quote[MAX_LINE_SIZE];
    int linenumber = 0;

    srand(time(NULL));

    quoteSource = fopen("bans.php","r");

    while(!feof(quoteSource))
      if(fgets(buf, 1024, quoteSource) && rand() % ++linenumber == 0)
      strcpy(quote,buf);

    printf("%s", quote);
    fclose(quoteSource);
}
This is the algorithm used in >>4, which is also in perlfaq5, and The Art of Computer Programming by Knuth.

Name: Anonymous 2006-03-13 3:50

>>13
Actually, I used this:
rand() < (RAND_MAX / counter)

Name: Anonymous 2006-03-13 19:14

Unfortunately, >>13 fails it somewhat. If the line chosen is line N then (N-1) previous lines have to be read in from the file first - what a waste of time! Thus the complexity is O(N), as compared to >>6 which has complexity O(1) 'cause it just obtains file size and does up to two reads.

Name: Anonymous 2006-03-13 19:41

>>15
as other people have stated, >>6 does not give each line an equal probability of being shown unless all lines are exactly the same length (maybe you could pad with spaces and trim it afterwards -- probably worth the small increase in cpu time)

Name: Anonymous 2006-03-14 19:39

>>16

So what?

Name: Anonymous 2006-03-15 3:10 (sage)

>>17
So don't say "Solution A is better than B because it's faster even though A doesn't really work correctly." That's what.

Name: Anonymous 2006-03-15 15:45

>>18

No, I mean "so what" as in "so what does it matter than each line's probability is proportional to its length"? It is still a random choice.

Name: Anonymous 2006-03-15 22:06

>>19

Way to go for failing to know what random means and still posting in /prog/

Name: Anonymous 2006-03-17 10:29

>>20

The problem does not require exact randomness, so you fail for being overly pedantic.

Name: Anonymous 2006-03-17 10:33

>>21
Who said it doesn't?

Name: Anonymous 2006-03-17 12:22

>>22
Anonymous, who else?

Name: Anonymous 2006-03-19 9:30

>>22

Common sense says so.

Name: Anonymous 2006-03-19 10:51

>>$ ruby -ne 'l = $_ if rand < 1/$..to_f; END { puts l }' < /usr/share/dict/words

Ruby is such a piece of a shit. Look what that code does is for each line on line n chooses that line with a probability of 1/n. So for line 1 1/1, line 2: 1/2, line 3: 1/3 ...

Anyways when you sum up all the probabilities you find that it actually chooses a line uniformly randomly from a stream without the need to know the size before hand.

The reason why ruby is a shit eating fuck tard language is because that code doesn't explain any of this AT ALL.

here's some C pseudo code(make the routines readline and endofline).

readline(fd,buf);
strcpy(saved,buf);
while(!endofline(fd)) {
   prob = 1.0/(++count);
   if (1.0*rand()/MAX_RANDOM <= prob) {
   strcpy(saved,buf);
   }
   readline(fd,buf);
}
puts(saved);

You know C is a pretty shit eating language too.

#!/bin/sh
awk "BEGIN {srand($RANDOM*$RANDOM);} {c = (rand() < (1.0 / FNR))?\$0:c} END { print c }" $*

Name: Anonymous 2006-03-19 10:55

>>25 beaten by 13 lol

Name: Anonymous 2006-03-19 12:27

The reason why ruby is a shit eating fuck tard language is because that code doesn't explain any of this AT ALL.
How do I commented code.

Name: Anonymous 2006-03-19 14:37

>>25
how do i picked random with 1/1 prob?!?!?!

Name: Anonymous 2006-03-20 7:14

>>25

Haha stupid

Name: Anonymous 2006-03-21 12:45

>>28

if you only have 1 element and you have to pick it randomly.. how do you pick it?

Name: Anonymous 2006-03-21 13:12

>>30
I do not know, I am an undergraduate :'(

Name: Anonymous 2006-03-22 21:47

>>25
We know you're a pretty shit eating programmer too

Name: Anonymous 2006-03-23 5:17

>>30
Implement Mersenne Twister one more time, yourself, using inline assembly, with AT&T's shitty fugly syntax, because you're hardcore. Then use it to choose between 1 or 1.

Name: Anonymous 2006-03-23 6:47

AT&T's shitty fugly syntax

SAY IT LIKE IT IS!

Why the fuck is it so common in UNIX land? Intel-style please!

Name: Anonymous 2006-03-23 7:49

It's popular among the GNU and Unix people because one or more of these reasons:
- It's easier to parse
- It's not Intel
- It's not what Microsoft uses; what Microsoft uses is evil so they must do things the other way
- It's fucking ugly, and these people sometimes have a taste for the fucking ugly

Name: Anonymous 2006-03-23 13:10

>>35
Fails. While it's true there's the "anything not M$ is teh win" mentality, it's mostly because when gas et al were being written, AT&T syntax was what was being used on the development machines (which I think were VAXen and Suns and other UNIX machines).

Also, GNU was written with UNIX compatibility _specifically in mind_; and guess what syntax is big on real UNIX boxes. AT&T.

Name: Anonymous 2006-03-23 18:00

>>32

I do enjoy scat, but that isn't what you meant.

When you sir go to linux user group meetings you buy beer and cigarettes. Before arriving home after another awesome LUG meeting you pour the beer all over yourself and surrounds yourself with burning cigarettes in order to convince your parents that you do have a social life. Sometimes you apply lipstick to your hand and gives yourself kisses.

Name: Anonymous 2006-03-23 18:24 (sage)

>>37 is very experienced, please read his words carefully.

Name: Anonymous 2010-09-21 13:23

I wouldn't want to read a random line from a file in C.

Name: Anonymous 2010-09-21 13:47

>>40
God damn it, you're such a fuckface. I hope you choke on a million dicks.

Name: VIPPER 2010-09-21 13:52

I must say, i enjoy this thread.

Name: Anonymous 2010-09-21 14:34

>>41
Why you so mad bro?

Name: sage 2010-09-21 15:26

>>43
Do we really have to point it out?
38 Name: Anonymous : 2006-03-23 18:24 (sage)
>>37 is very experienced, please read his words carefully.
40 Name: Anonymous : 2010-09-21 13:23
I wouldn't want to read a random line from a file in C.

Name: Anonymous 2010-09-21 16:29

>>1
Being from four years ago is not an excuse not to use the [code] tags.

Name: Anonymous 2011-09-23 11:53

[tt]nigger[/tt]

C - read a random line from a file.

1 Name: Anonymous 2006-03-06 10:31

2 Name: Anonymous 2006-03-06 10:43

3 Name: Anonymous 2006-03-06 10:47

4 Name: Anonymous 2006-03-06 11:18

5 Name: Shirizaan 2006-03-06 11:42

6 Name: Anonymous 2006-03-06 16:38

7 Name: Anonymous 2006-03-06 18:17

8 Name: Anonymous 2006-03-07 12:50

9 Name: Anonymous 2006-03-08 19:40

10 Name: Anonymous 2006-03-12 19:59

11 Name: Anonymous 2006-03-12 20:03

12 Name: Anonymous 2006-03-12 20:58

13 Name: Anonymous 2006-03-13 0:12

14 Name: Anonymous 2006-03-13 3:50

15 Name: Anonymous 2006-03-13 19:14

16 Name: Anonymous 2006-03-13 19:41

17 Name: Anonymous 2006-03-14 19:39

18 Name: Anonymous 2006-03-15 3:10 (sage)

19 Name: Anonymous 2006-03-15 15:45

20 Name: Anonymous 2006-03-15 22:06

21 Name: Anonymous 2006-03-17 10:29

22 Name: Anonymous 2006-03-17 10:33

23 Name: Anonymous 2006-03-17 12:22

24 Name: Anonymous 2006-03-19 9:30

25 Name: Anonymous 2006-03-19 10:51

26 Name: Anonymous 2006-03-19 10:55

27 Name: Anonymous 2006-03-19 12:27

28 Name: Anonymous 2006-03-19 14:37

29 Name: Anonymous 2006-03-20 7:14

30 Name: Anonymous 2006-03-21 12:45

31 Name: Anonymous 2006-03-21 13:12

32 Name: Anonymous 2006-03-22 21:47

33 Name: Anonymous 2006-03-23 5:17

34 Name: Anonymous 2006-03-23 6:47

35 Name: Anonymous 2006-03-23 7:49

36 Name: Anonymous 2006-03-23 13:10

37 Name: Anonymous 2006-03-23 18:00

38 Name: Anonymous 2006-03-23 18:24 (sage)

40 Name: Anonymous 2010-09-21 13:23

41 Name: Anonymous 2010-09-21 13:47

42 Name: VIPPER 2010-09-21 13:52

43 Name: Anonymous 2010-09-21 14:34

44 Name: sage 2010-09-21 15:26

45 Name: Anonymous 2010-09-21 16:29

48 Name: Anonymous 2011-09-23 11:53