Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

PERL counting words that are missing a vowel

Name: Anonymous 2011-06-04 16:02

Write a program called no_vowels.pl that will count
how many words DO NOT have an 'a' or an 'A',
how many DO NOT have an 'e' or an 'E',
how many DO NOT have an 'i' or an
'I', and so on.

The output should look like the below. 
Note that the letters must be listed in decreasing order of frequency

14000 words do not have an 'u'
11258 words do not have an 'o'
10123 words do not have an 'i'
11523 words do not have an 'a'
10111 words do not have an 'e'

Name: Anonymous 2011-06-10 20:36

Someone please optimize this, I suck at programming.


#include <boost/thread.hpp>
#include <boost/bind.hpp>
#include <boost/smart_ptr.hpp>
#include <list>
#include <vector>
#include <utility>
#include <set>
#include <fstream>
#include <iostream>

#ifdef _WIN32
#include <Windows.h>

size_t CountCPUCores()
{
    SYSTEM_INFO sysinfo;
    GetSystemInfo(&sysinfo);
    return sysinfo.dwNumberOfProcessors;
}

#else

// write this yourself linuxfag
size_t CountCPUCores()
{
    return 1;
}

#endif

typedef std::list< boost::shared_ptr<boost::thread> > threadlist;

struct BasicString
{
    BasicString() : p(0), len(0) {}
    BasicString(char* p_, size_t len_) : p(p_), len(len_) {}
    bool Step() { if(!len) return false; ++p; --len; }
    char* p;
    size_t len;
};

class LineReader
{
public:
    LineReader() {}

    LineReader(char* p, size_t len)
        : s(p, len)
    {
    }

    void Set(char* p, size_t len)
    {
        s.p = p;
        s.len = len;
    }

    bool Read(BasicString& into)
    {
        if(!s.len)
            return false;

        into.p = s.p;
        into.len = 0;
        while(*s.p != '\n' && s.Step())
            ++into.len;
       
        if(s.len)
            s.Step();

        return true;
    }

    size_t Length() const
    {
        return s.len;
    }
private:
    BasicString s;
};

void lower_string(BasicString& s)
{
    char* p = s.p;
    for(size_t i = 0; i < s.len; ++i, ++p)
    {
        if(*p >= 'A' && *p <= 'Z')
            *p = 'a' + (*p - 'A');
    }
}

void BeginCount(int* counter, size_t& total, LineReader& input)
{
    bool seen[256];
    while(true)
    {
        BasicString s;
        if(!input.Read(s))
            return;
        ++total;

        // ignore empty lines
        if(s.len == 0)
            continue;

        lower_string(s);
        memset(seen, 0, sizeof(seen));

        for(size_t i = 0; i < s.len; ++i, ++s.p)
        {
            if(!seen[*s.p])
            {
                ++counter[*s.p];
                seen[*s.p] = true;
            }
        }
    }
}

int main()
{
    // 8 threads per core seems to be the sweet spot for me
    const size_t ncores = CountCPUCores() * 8;
    const char* usechars = "aeiou";
    const size_t usecharslen = strlen(usechars);
    size_t total = 0;
    std::ifstream file("words.txt");
    if(!file.is_open())
    {
        std::cout << "Cannot open file\n";
        return 1;
    }
    file.seekg(0, std::ios::end);
    size_t filelen = (size_t)file.tellg();
    file.seekg(0, std::ios::beg);
    char* strp = new char[filelen];
    file.read(strp, filelen);
    file.close();
   
    std::vector<LineReader> readers(ncores);
    size_t part = filelen / ncores;
    size_t lastp = filelen;
    for(size_t i = 0; i < (ncores-1); ++i)
    {
        size_t cur = ncores - i - 1;
        size_t tpar = part * cur;
        readers[cur].Set(&strp[tpar], lastp - tpar);
        BasicString tmp;
        readers[cur].Read(tmp);
        lastp -= readers[cur].Length();
    }
    readers[0].Set(strp, lastp);

    std::vector< boost::shared_array<int> > counters(ncores);
    for(size_t i = 0; i < ncores; ++i)
    {
        counters[i] = boost::shared_array<int>(new int[256]);
        memset(counters[i].get(), 0, sizeof(int) * 256);
    }

    std::vector<size_t> totals(ncores, 0);

    {
        threadlist threads;
        for(size_t i = 0; i < ncores; ++i)
        {
            try {
                threads.push_back(boost::shared_ptr<boost::thread>(new boost::thread(
                    boost::bind(BeginCount, counters[i].get(), boost::ref(totals[i]),
                        boost::ref(readers[i])))));
            }
            catch(boost::thread_exception& te)
            {
                BeginCount(counters[i].get(), totals[i], readers[i]);
            }
        }

        for(threadlist::iterator it = threads.begin(); it != threads.end(); ++it)
            it->get()->join();
    }

    delete[] strp;

    std::set< std::pair<size_t, char> > ms;
    for(size_t i = 0; i < usecharslen; ++i)
    {
        size_t cnt = 0;
        for(size_t j = 0; j < ncores; ++j)
            cnt += counters[j][usechars[i]];
        ms.insert(std::pair<size_t, char>(cnt, usechars[i]));
    }

    for(size_t i = 0; i < ncores; ++i)
        total += totals[i];

    for(std::set< std::pair<size_t, char> >::iterator it = ms.begin(); it != ms.end(); ++it)
        std::cout << (total - it->first) << " words do not have '" << it->second << "'\n";
}

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List