/prog/ - PERL counting words that are missing a vowel

Name: Anonymous 2011-06-04 16:02

Write a program called no_vowels.pl that will count
how many words DO NOT have an 'a' or an 'A',
how many DO NOT have an 'e' or an 'E',
how many DO NOT have an 'i' or an
'I', and so on.

The output should look like the below.
Note that the letters must be listed in decreasing order of frequency

14000 words do not have an 'u'
11258 words do not have an 'o'
10123 words do not have an 'i'
11523 words do not have an 'a'
10111 words do not have an 'e'

Name: Anonymous 2011-06-10 20:36

Someone please optimize this, I suck at programming.



#include <boost/thread.hpp>

#include <boost/bind.hpp>

#include <boost/smart_ptr.hpp>

#include <list>

#include <vector>

#include <utility>

#include <set>

#include <fstream>

#include <iostream>



#ifdef _WIN32

#include <Windows.h>



size_t CountCPUCores()

{

    SYSTEM_INFO sysinfo;

    GetSystemInfo(&sysinfo);

    return sysinfo.dwNumberOfProcessors;

}



#else



// write this yourself linuxfag

size_t CountCPUCores()

{

    return 1;

}



#endif



typedef std::list< boost::shared_ptr<boost::thread> > threadlist;



struct BasicString

{

    BasicString() : p(0), len(0) {}

    BasicString(char* p_, size_t len_) : p(p_), len(len_) {}

    bool Step() { if(!len) return false; ++p; --len; }

    char* p;

    size_t len;

};



class LineReader

{

public:

    LineReader() {}



    LineReader(char* p, size_t len)

        : s(p, len)

    {

    }



    void Set(char* p, size_t len)

    {

        s.p = p;

        s.len = len;

    }



    bool Read(BasicString& into)

    {

        if(!s.len)

            return false;



        into.p = s.p;

        into.len = 0;

        while(*s.p != '\n' && s.Step())

            ++into.len;

        

        if(s.len)

            s.Step();



        return true;

    }



    size_t Length() const

    {

        return s.len;

    }

private:

    BasicString s;

};



void lower_string(BasicString& s)

{

    char* p = s.p;

    for(size_t i = 0; i < s.len; ++i, ++p)

    {

        if(*p >= 'A' && *p <= 'Z')

            *p = 'a' + (*p - 'A');

    }

}



void BeginCount(int* counter, size_t& total, LineReader& input)

{

    bool seen[256];

    while(true)

    {

        BasicString s;

        if(!input.Read(s))

            return;

        ++total;



        // ignore empty lines

        if(s.len == 0)

            continue;



        lower_string(s);

        memset(seen, 0, sizeof(seen));



        for(size_t i = 0; i < s.len; ++i, ++s.p)

        {

            if(!seen[*s.p])

            {

                ++counter[*s.p];

                seen[*s.p] = true;

            }

        }

    }

}



int main()

{

    // 8 threads per core seems to be the sweet spot for me

    const size_t ncores = CountCPUCores() * 8;

    const char* usechars = "aeiou";

    const size_t usecharslen = strlen(usechars);

    size_t total = 0;

    std::ifstream file("words.txt");

    if(!file.is_open())

    {

        std::cout << "Cannot open file\n";

        return 1;

    }

    file.seekg(0, std::ios::end);

    size_t filelen = (size_t)file.tellg();

    file.seekg(0, std::ios::beg);

    char* strp = new char[filelen];

    file.read(strp, filelen);

    file.close();

    

    std::vector<LineReader> readers(ncores);

    size_t part = filelen / ncores;

    size_t lastp = filelen;

    for(size_t i = 0; i < (ncores-1); ++i)

    {

        size_t cur = ncores - i - 1;

        size_t tpar = part * cur;

        readers[cur].Set(&strp[tpar], lastp - tpar);

        BasicString tmp;

        readers[cur].Read(tmp);

        lastp -= readers[cur].Length();

    }

    readers[0].Set(strp, lastp);



    std::vector< boost::shared_array<int> > counters(ncores);

    for(size_t i = 0; i < ncores; ++i)

    {

        counters[i] = boost::shared_array<int>(new int[256]);

        memset(counters[i].get(), 0, sizeof(int) * 256);

    }



    std::vector<size_t> totals(ncores, 0);



    {

        threadlist threads;

        for(size_t i = 0; i < ncores; ++i)

        {

            try {

                threads.push_back(boost::shared_ptr<boost::thread>(new boost::thread(

                    boost::bind(BeginCount, counters[i].get(), boost::ref(totals[i]),

                        boost::ref(readers[i])))));

            }

            catch(boost::thread_exception& te)

            {

                BeginCount(counters[i].get(), totals[i], readers[i]);

            }

        }



        for(threadlist::iterator it = threads.begin(); it != threads.end(); ++it)

            it->get()->join();

    }



    delete[] strp;



    std::set< std::pair<size_t, char> > ms;

    for(size_t i = 0; i < usecharslen; ++i)

    {

        size_t cnt = 0;

        for(size_t j = 0; j < ncores; ++j)

            cnt += counters[j][usechars[i]];

        ms.insert(std::pair<size_t, char>(cnt, usechars[i]));

    }



    for(size_t i = 0; i < ncores; ++i)

        total += totals[i];



    for(std::set< std::pair<size_t, char> >::iterator it = ms.begin(); it != ms.end(); ++it)

        std::cout << (total - it->first) << " words do not have '" << it->second << "'\n";

}

Name: Anonymous 2011-06-10 20:40

>>81
bool Step() { if(!len) return false; ++p; --len; return true; }

Name: Anonymous 2011-06-10 20:42

>Someone please optimize this, I suck at programming.

Yeah, don't use C++ io streams, they are slow as fuck!, there's like several virtual calls per fucking character read from a file plus 2 or 3 fucking buffers things get copied into. The solution is either to use C style fopen/fread/etc. which is better, but the optimal solution is to use your operating system's file IO facilities, like the POSIX open/read/write calls or Windows OpenFile, ReadFile, etc. and just wrap it in a thin C++ class with no virtual functions.

Also, boost's threads implementation is slow and unoptimized because boost programmers are lazy as fuck like you. Use a C++0x/C++11 threads implementation, which has the same interfaces, but is more optimized.

Name: Anonymous 2011-06-10 20:46

http://aeiou.org.au/

Name: Anonymous 2011-06-10 20:50

>>60
It's actually a fairly functional language. But yeah, it's probably going nowhere, like most of my petty projects ;_;
Well, today it's hard to advance new language, without backing at Microsoft (Haskell) or Google (Python).

Name: Anonymous 2011-06-10 20:55

>>84
I chuckled.

Name: Anonymous 2011-06-10 21:13

>>81
Hmm I just realized you can pass extra parameters directly to boost.thread without using boost.bind.

>>83
Yes I just discovered how slow they are. I wrote the LineReader class after trying to do the whole thing with streams and wondering why it was so butt-fucking-slow.

Name: Anonymous 2011-06-10 22:15

</code>
sub print_vowels {

    my %vowels = @_;
    foreach (sort { $vowels{$a} <=> $vowels{$b} } keys %vowels) {
      print "$_ was not in $vowels{$_} words\n";
    }
}

my %vowels;

while (<>) {
    chomp;
    my @words = split " ", $_;
    foreach (@words) {
      $vowels{a}++ if (/^[^Aa]*$/);
      $vowels{e}++ if (/^[^Ee]*$/);
      $vowels{i}++ if (/^[^Ii]*$/);
      $vowels{o}++ if (/^[^Oo]*$/);
      $vowels{u}++ if (/^[^Uu]*$/);
    }
    print_vowels %vowels;
}
</code>

Name: Anonymous 2011-06-11 4:35

　 ∧＿∧　　　∧＿∧　　　∧＿∧　　 ∧＿∧　　　　∧＿∧
　（　・∀・）　（　｀ー´）　　（　´∀｀）　（　ﾟ ∀ﾟ）　　　（　＾∀＾）
　（　　　　つ┳∪━━∪━∪━━∪━∪━∪━┳⊂　　　つ
　｜｜　|　 ┃This thread has peacefully ended.┃　｜｜｜
　（_＿）＿） ┻━━━━━━━━━━━━━━┻　（_＿）＿）　　　　　Thank you.

Name: Anonymous 2011-06-11 5:02

>>81

++total;



// ignore empty lines

if(s.len == 0)

     continue;

Whoops, that ++total is supposed to come after the empty string check.

Name: Anonymous 2011-06-11 8:09

>>90
Woohps, taht ++taotl is suoesppd to cmoe aeftr the epmty sirtng cehkc

Name: Anonymous 2011-06-11 8:45

CUNTS

Name: Anonymous 2011-06-11 17:44

>>91
Nigger, that nigger is nigger nigger nigger nigger.

PERL counting words that are missing a vowel

1 Name: Anonymous 2011-06-04 16:02

81 Name: Anonymous 2011-06-10 20:36

82 Name: Anonymous 2011-06-10 20:40

83 Name: Anonymous 2011-06-10 20:42

84 Name: Anonymous 2011-06-10 20:46

85 Name: Anonymous 2011-06-10 20:50

86 Name: Anonymous 2011-06-10 20:55

87 Name: Anonymous 2011-06-10 21:13

88 Name: Anonymous 2011-06-10 22:15

89 Name: Anonymous 2011-06-11 4:35

90 Name: Anonymous 2011-06-11 5:02

91 Name: Anonymous 2011-06-11 8:09

92 Name: Anonymous 2011-06-11 8:45

93 Name: Anonymous 2011-06-11 17:44