Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon.

Pages: 1-4041-

JS Optimization

Name: Anonymous 2011-09-12 5:05

After profiling, I see that roughly 25-30% of my program's time is spent inside this function.

Unfortunately, this isn't my algorithm, and I don't think I can improve it. The paper it came from is here: http://www.dgp.toronto.edu/people/stam/reality/Research/pdf/GDC03.pdf

My only option seems to be "performance tricks", but I don't see any openings. The only thing I can say for sure that I haven't tried is ``loop unrolling'', but I don't have much experience doing that myself, and I don't see how I could do it here while staying in bounds. If it helps, height, width, and iterations are all set by the user.

So: any ideas on how to improve the performance of this code?

function lin_solve(b, x, x0, a, c) {
        if (a === 0 && c === 1) {
            for (var j = 1; j <= height; j++) {
                var currentRow = j * rowSize;
                ++currentRow;
        var i = width - 1; do {
                    x[currentRow] = x0[currentRow];
                    ++currentRow;
                } while (i--);
            }
            set_bnd(b, x);
        } else {
            var invC = 1 / c;
            for (var k = 0; k < iterations; k++) {
                for (var j = 1; j <= height; j++) {
                    var lastRow = (j - 1) * rowSize;
                    var currentRow = j * rowSize;
                    var nextRow = (j + 1) * rowSize;
                    var lastX = x[currentRow];
                    ++currentRow;
                   
            var i = width; do {
                        lastX = x[currentRow] = (x0[currentRow] + a*(lastX+x[++currentRow]+x[++lastRow]+x[++nextRow])) * invC;
            } while (i-- > 1)
                }
                set_bnd(b, x);
            }
        }
    }

Name: Anonymous 2011-09-12 5:07

Looks like the indents got fucked up a bit. Let's try that again:

    function lin_solve(b, x, x0, a, c) {
        if (a === 0 && c === 1) {
            for (var j=1 ; j<=height; j++) {
                var currentRow = j * rowSize;
                ++currentRow;
                var i = width - 1; do {
                    x[currentRow] = x0[currentRow];
                    ++currentRow;
                } while (i--);
            }
            set_bnd(b, x);
        } else {
            var invC = 1 / c;
            for (var k=0 ; k<iterations; k++) {
                for (var j=1 ; j<=height; j++) {
                    var lastRow = (j - 1) * rowSize;
                    var currentRow = j * rowSize;
                    var nextRow = (j + 1) * rowSize;
                    var lastX = x[currentRow];
                    ++currentRow;
                   
                    var i = width; do {
                        lastX = x[currentRow] = (x0[currentRow] + a*(lastX+x[++currentRow]+x[++lastRow]+x[++nextRow])) * invC;
                    } while (i-- > 1)
                }
                set_bnd(b, x);
            }
        }
    }

Name: Anonymous 2011-09-12 5:11

I'd say to write it in C, but obviously that cannot be an option.

Name: Anonymous 2011-09-12 5:45

What is it, a Gauss Seidel iterative solver? I'm not reading that ugly as fuck paper.

Name: n3n7i 2011-09-12 5:47

At first glance, Could replace



var lastRow = (j - 1) * rowSize;
var currentRow = j * rowSize;
var nextRow = (j + 1) * rowSize;

with

var lastRow = (j - 1) * rowSize;
var currentRow = lastRow + j;
var nextRow = currentRow + j;


for a minor speed boost...
Also probably not declare var inside a loop?

Name: Anonymous 2011-09-12 6:20

replace for loop with do while loop for another minor speed boost.


for (var i = 0; i < bla; i++) {
    ...
}

| | |
V V V

var i = bla;
do {
    ...
} while (i--)

Name: Anonymous 2011-09-12 6:22

>>4
Yep, that's it.

>>5
There's no semantic difference in where the var keyword is placed in this case. Your other suggestion is clearer code if nothing else, so thank you.

Name: Anonymous 2011-09-12 6:28

Stick the if-else into one GIANT for loop, the bigger it is the faster it goes. VROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOMMMMMMMMMMMMMMMMM

Name: Anonymous 2011-09-12 6:37

>>6
This actually will work great for the iterations loop, since this way we always do at least one iteration, even if the user wasn't thinking.

Does this matter if the conditional isn't an implicit > 0? The nested loop in the else block looks difficult to untangle to go in the negative direction; I should probably know my ROI before I try it.

Currently at:
    function lin_solve(b, x, x0, a, c) {
        if (a === 0 && c === 1) {
            var j = height; do {
                var currentRow = j * rowSize;
                ++currentRow;
                var i = width - 1; do {
                    x[currentRow] = x0[currentRow];
                    ++currentRow;
                } while (i--);
            } while (j-- > 1);
            set_bnd(b, x);
        }
        else {
            var invC = 1 / c;
            var k = iterations; do {
                for (var j=1 ; j<=height; j++) {
                    var lastRow = (j - 1) * rowSize;
                    var currentRow = lastRow + rowSize;
                    var nextRow = currentRow + rowSize;
                    var lastX = x[currentRow];
                    ++currentRow;
                   
                    var i = width; do {
                        lastX = x[currentRow] = (x0[currentRow] + a*(lastX+x[++currentRow]+x[++lastRow]+x[++nextRow])) * invC;
                    } while (i-- > 1)
                }
                set_bnd(b, x);
            } while (k--);
        }
    }

Name: Anonymous 2011-09-12 10:17

>So: any ideas on how to improve the performance of this code?
Rewrite in C, compile with max optimizations.
If you stay with JS, you need to improve your entire algorithm. Hunting for small performance tweaks after the design is complete is "post-mature optimization", its always harder, less rewarding and more limited than true algorithmic optimization which is done outside-the-box.

Name: Anonymous 2011-09-12 10:37

Your first loop can be "flattened" like this (I don't do JS so there might be syntax errors, and you might need to adjust the indices a little, but you get the point.) It's essentially a memcpy().

var i = 1, j = rowSize*height;
while(i<l)
 x[i] = x0[i++];


A good JIT JS compiler should turn that into a rep movsd which is about as fast as you can get (since it's an intrinsic loop in hardware.)

Apply the same principles to your second loop. Try to move the multiplies out of the loop. If you profile line-by-line you will find they are taking the bulk of the execution time.

I also found this idiot who contradicts his own benchmarks:
http://lemire.me/blog/archives/2010/07/19/is-multiplication-slower-than-addition/
(Scroll down to the table of results. Hardly surprising.)

Name: Anonymous 2011-09-12 10:42

Whats the rest of program that takes 75%? Its that bad?

Name: Anonymous 2011-09-12 11:07

Omg hai ^___^ Im Kirby-chan and I absolutely luuuv @_____@ worldfourch <3 and my fav is the /prog/ and lounge boards!!!!! Okies so anyways, im going to tell you about the BEST day of my life when I met my hot husband sussman!! <333333333 OMFGZ HE WAS SOOOOO FREAKIN KAWAII IN PERSON!!! Supa kawaii desu!!!!!!!! ^______________________________________^
When I walked onto 6.001 =^____^=I looked up and saw&SUSSMAN!!!!!!!!!!!!!!! <33333333333333333333333333333333333333333333333333333333333333!!!!
KONNICHIWA OMGZZZZZZZZZZZZZZZZ SUPA SUPA SUPA KAWAII SUSSMAN-SAMA!!!!! I yelled n____n then he turned chibi then un-chibi!!
he looked at me [O.O;;;;;;;;;;;] and then he saw how hot I am *___* he grabbed my dick and winked ~_^ then pulled me behind a Lisp machine o_o and started to conjure the spirits of the computer with his spells!!!!!! [OMG!!! HIS PARENS LOOKED LIKE A /prog/SNAKE!!! RLY!! >.> <.< >.< *(^O^)* *(^O^)* *(^O^)*] then I saw some baka stupid pythonista watching us and I could tell he was forcing his indentation with his eyes!!!!!!! [ -_____________-;;;;; OMG I COULDNT BELIEVE IT EITHER!!! (ò_ó) (ò_ó) (ò_ó)] so I yelled UH UH BAKA NEKO THATS MY MAN WHY DONT YOU GO HOOK UP WITH ABELSON CAUSE SUSSMAN-SAMA LOVES ME!!! (ò_ó) then sussman held me close =^____^= and said he would only ever love me!!!!!!!! an guess wat!!!!!! he conjured the spirits again!!!!!!! ** (*O*)/ then we went to his apartment and programmed all night long and made 42 metacircular lisp interpreters and they all became highly successful!!!!!!!!!!!!!!! Nyaaaaa!!! (^________<) ^_________________^;;;;;;;;;;;;;;;;

Name: Anonymous 2011-09-12 13:25

Name: Anonymous 2011-09-12 15:04

GC is shit.

Name: Anonymous 2011-09-12 16:27

>>11
This is what I would do, but frankly, I'm not that talented. This is "Babby's first fluid dynamics" for me.

>>12
As of right now, most of it isn't actually the Javascript, kind of.

~60% of it is spent by the browser, natively, in its DOM rendering procedure.

This is a bottleneck correlates with my program. I've written an ASCII renderer for the fluid solver, and the display function is essentially an Array.join and a call to innerHTML. I imagine browsers don't optimize for redrawing a big-ass text block once every 10 ms.

I might be able to have more faster speeds with a canvas made to look like any old element, scaled to the page dimensions, or maybe some SVG trickery.

(This feedback from the profiler is a bit confusing, because it says that the call to innerHTML accounts for less than 1%. However, I'm pretty sure that's what's going on.)

~7% of it is spent in the garbage collector.

My guess is that the advantages from temporary cache values outweigh the extra time spent collecting. I would try optimizing for this as a very last resort.

~30% is spent on the physics solver's update function.

I've managed to take down the lin_solve time to ~12-15% by lowering the iterations and doing half of what >>11 suggested (I'm currently working on how to unroll the second, more complicated set of loops -- we'll see if it helps at all).

~7% is a different solver function, advect, and another ~6% is in project. Each have nested width/height loops with multiplications, so I might be able to do some unrolling as per >>11's advice.

Ha. Aren't you glad you asked?

Name: Anonymous 2011-09-12 18:10

make rowsize a power of two and use bitshifts, except modern computers do MUL about as fast as SHIFT.  I'll bet your compiler takes-care-of unrolling if you set some option.

Name: Anonymous 2011-09-12 18:19

wuts in the set_bnd() function?

Name: Anonymous 2011-09-12 18:51

>>16
>>11's optimization is not loop unrolling, but flattening.

Unrolling = multiple iterations in one iteration of the loop
Flattening = replace nested loops with ones having fewer degrees of nesting

>>17
MUL is not as fast as SHIFT except in trivial cases. Shifts are essentially free with a barrel shifter.

Name: Anonymous 2011-09-12 18:55

>>17

That sounds clever, but I don't think I can guarantee rowSize as a power of 2, since it's based on width which is set by the user. Although it could be my naivete about the implementation that doesn't allow me to see that as a possibility.

Also, this code will be run in the browser, so no CFLAGS YO.

>>18

Accounts for ~0.3%.

    function set_bnd(b, x) {
   
        // small caching optimizations
        var lastRow = height * rowSize;
        var maxEdge = (height + 1) * rowSize;
        var maxWidth = (width + 1);
       
        if (b===1) {
            for (var i = 1; i <= width; i++) {
                x[i] =  x[i + rowSize];
                x[i + maxEdge] = x[i + lastRow];
            }

            for (var j = 1; i <= height; i++) {
                var jRow = j * rowSize;
                x[jRow] = -x[1 + jRow];
                x[maxWidth + jRow] = -x[width + jRow];
            }
        } else if (b === 2) {
            for (var i = 1; i <= width; i++) {
                x[i] = -x[i + rowSize];
                x[i + maxEdge] = -x[i + lastRow];
            }

            for (var j = 1; j <= height; j++) {
                var jRow = j * rowSize;
                x[jRow] =  x[1 + jRow];
                x[maxWidth + jRow] =  x[width + jRow];
            }
        } else {
            for (var i = 1; i <= width; i++) {
                x[i] =  x[i + rowSize];
                x[i + maxEdge] = x[i + lastRow];
            }

            for (var j = 1; j <= height; j++) {
                var jRow = j * rowSize;
                x[jRow] =  x[1 + jRow];
                x[maxWidth + jRow] =  x[width + jRow];
            }
        }
        x[0]                = 0.5 * (x[1] + x[rowSize]);
        x[maxEdge]          = 0.5 * (x[1 + maxEdge] + x[lastRow]);
        x[maxWidth]         = 0.5 * (x[width] + x[maxWidth + rowSize]);
        x[maxWidth+maxEdge]    = 0.5 * (x[width + maxEdge] + x[maxWidth + lastRow]);
    }

Name: Anonymous 2011-09-12 18:57

When you assign to innerHTML, the browser probably reflows the page, which is a comparatively slow operation.
Canvas, or WebGL if you want to get fancy, could be a lot faster.
WebGL in particular would let you upload a raw array and let the shader do the work.

Is this for some HTML5 demonstration, or something that should run on smartphones?
If none of the above and you just need something that runs in a browser, as much as it pains me to say it, you'll get better performance by making it a Java applet.
That's not to say I recommend it if you can get at all acceptable performance by doing it `browser native'.

Name: Anonymous 2011-09-12 19:04

your daily reminder to check those dubs

Name: Anonymous 2011-09-12 19:07

>>16
This is what I would do, but frankly, I'm not that talented. This is "Babby's first fluid dynamics" for me.
You don't need to know what the algorithm is for or how it does what it does if you're making these sorts of "mechanical" optimizations; I know absolutely nothing about fluid dynamics nor the workings of this algorithm, but I can tell you to get rid of multiplies, flatten loops, and use fixed-point if you don't need the range of floats. I'm just analysing it from the perspective of "what operations are being performed and how can I make them go faster with equivalent results".

>>20
Get rid of those j * rowSize multiplies and replace them with addition.

0.5 *
Are x/x0 floating-point? Use integer/fixed-point if you don't need the extra range.

Name: Anonymous 2011-09-12 19:15

>>19
MUL SHIFT

You might want to do instruction timing tests on a modern CPU.  Old school is completely out-of-date.  You will be shocked.  I wrote a compiler and discovered my optimizations didn't matter because the CPU converts everything to a RISC pipeline and schedules really cleverly.  Old school compiler optmizations don't matter.

Name: Anonymous 2011-09-12 19:33

>>21
Trying to stay html/js only on this one.

Right now, the performance on my machine at 1680 × 930 is pretty good in Chrome,  okay in Firefox, bad in Safari, and in IE8 is roughly 1 frame per second.

I think you're right about the reflowing, even though this div is essentially the only element on the page.

I'm going to try a canvas implementation. I think it will help a lot.

Name: Anonymous 2011-09-12 19:35

>>25
DIV is more expensive than MUL, so shifts make a bigger difference.  Use powers of two and AND instead of Modulo.  Don't trust your instincts -- test it.  It will keep you from making worthloess garbage changes to your code when optimizing.

Name: Anonymous 2011-09-12 19:44

Can you run it through Google Closure for extra juice?

Name: Anonymous 2011-09-12 19:44

>>23
Get rid of those j * rowSize multiplies and replace them with addition.
I probably should have called it "Babby's first algorithm optimization". I'm just feeling a bit dense in this thread? For example, I must admit, I'm not sure how to do what you suggest here.

Are x/x0 floating-point?
Yeah, although all numbers in js are IEEE 754. There's typed Uint arrays in Chrome, but they're slower.

Name: Anonymous 2011-09-12 19:47

>>27
I was not even aware that this existed. Shit!

Name: Anonymous 2011-09-13 0:03

this should be posted on jsperf so it can be empirically tested

Name: Anonymous 2011-09-13 0:13

>>26
He's talking about divs as in the HTML element, not divide instructions (which he doesn't have in his code).

>>28
You're setting a variable to the loop induction variable multiplied by a loop invariant. It should be obvious from inspection that this variable increases by the amount of the loop invariant upon every iteration. Thus, initialize it to its initial value and add the loop invariant upon every iteration.

Name: Anonymous 2011-09-13 1:29

chain your "var" declarations.. "var a, b, c".

also check out this:
http://code.google.com/closure/compiler/

Name: Anonymous 2011-09-13 13:32

also check out this
<-- or this.

Name: Anonymous 2011-09-13 15:20

Canvas was a disaster. Not only does the typography render poorly, but the framerate was cut in half. Admittedly I used a pretty naive implementation, but after seeing what it looked like, it's not worth it to me to try and optimize for it -- not that I can think of many reasonable options for doing so, considering how simple my draw instruction is, and how much more I would need to, saw, implement redraw regions.

I tried using a <textarea> instead of a <div>, thinking that value, being of text only, might improve the performance. Alas, no improvement. Same amount of time in the renderer.

It's back to my only option of cleaning up the solver, then. I still have a lot of small places I can make optimizations. I'm also wondering if there's a place I could use Web Workers; I can't think of a place to fork, though.

Name: Anonymous 2011-09-13 15:21

to, saw,

Er, that is completely wrong. I meant to write "to, say,"

Name: VIPPER 2011-09-13 17:48

>>34
Canvas was a disaster.
Where did your tripcode go moot?

Name: n3n7i 2011-09-13 23:19


            for (var j=1 ; j<=height; j++) {
                var currentRow = j * rowSize;
                ++currentRow;
                var i = width - 1; do {
                    x[currentRow] = x0[currentRow];
                    ++currentRow;
                } while (i--);
            }


This bit still looks a bit overcomplicated....
Any chance rowSize = width?


            var maxLoop = (height * rowSize) + width;
            for (var j=rowSize+1; j<=maxLoop+1; j++) {
                    x[j] = x0[j];
                    }

Name: Anonymous 2011-09-14 1:32

>>36

LOL

Name: Sussex Dev Team 2011-09-14 2:05

>>37
Please stop decreasing the average quality of posts where the name field is equal to the string "n3n7i".

Thank you.

Name: Anonymous 2011-09-14 23:49

Maybe this is why Crockford says not to use ++ incrementors? Am I being a faggot right now?

What was given:

prevX = x[current - 1];
prevX = x[current] = (x0[current] + a * (prevX + x[++current] + x[++previous] + x[++next])) * invC;

What I expected:

previous = 0;
current = 11;
next = 20;

prevX = x[10];
prevX = x[12] = (x0[12] + a * (x[10] + x[12] + x[1] + x[21]) * invC;

What I got:

previous = 0;
current = 11;
next = 20;

prevX = x[10];
prevX = x[11] = x0[11] + a * (x[10] + x[12] + x[1] + x[21]) * invC;


Just straight up left-to-right replacement. No preference given to instructions in the parens, unlike every other use of the plus sign. I don't know if that's the same in other C-like languages, but I do know that this problem took me hours to figure out.

Guess I shoulda read ECMA-262...

Name: Anonymous 2011-09-14 23:56

(Actually it's (x0[11] + a * (x[10] + x[12] + x[1] + x[21])) * invC; I forgot the parentheses accidentally.)

Name: Anonymous 2011-09-15 0:33

>>40
I don't know if that's the same in other C-like languages
who gives a fuck, this is javascript!

enjoy your broken ==

Name: Anonymous 2011-09-15 2:38

>>42

I use ===

Name: Anonymous 2011-09-15 10:44

>>43
The forced usage of the identity operator

Name: Anonymous 2011-09-17 3:21

10 things I've learned about writing efficient JS

1. On my setup, for is faster than while and do...while, even when you do it in reverse and take out the comparison operators (http://jsperf.com/loop-comparison). Maybe that's not true for other people, but that it's possible at all makes me think that the rationale for using do while in the first place is bullshit. Add to this dubious performance gain the extra effort incurred by ensuring that your loop works backwards as well as forwards, and finally I reach the conclusion that this advice needs to stop. for with cached length is as good as you can get, even when you're trying to wring every last drop out of your loops.

2. If your loop optimization idea requires that you add a conditional inside the loop, then it's probably not an optimization. That shit is really slow.

3. Use bitwise math for floor/ceil/round on positive Numbers. You can throw in a | 0 or a + 0.5 | 0 for free, basically. Meanwhile, Math.round() is way more expensive than it should be.

4. Google tools: Google Closure makes code smaller, not faster. And ADVANCED_MODE requires that you write Javascript like a Java programmer. However, Google also has the fastest Javascript engine and the clearest profiler in Chrome. They also have ``Speed Tracer'', which profiles the entire page from within the browser, showing you how much is spent on paint, layout, etc. (http://code.google.com/webtoolkit/speedtracer/)

5. Incrementing operators (++, --) are evaluated from left to right in a chain of assignments -- not right to left, and not by nested parentheses. Since this is obtuse, it would be better if you didn't use these inside complex operations.

6. Heavy closure nesting doesn't affect the performance of the innermost closure by default, as outside variables are only looked up if they're used inside the closure. Same goes for "special" variables like arguments -- performance takes a hit when you use these, as you should expect with anything that resembles reflection.

A little trick: if your code uses the DOM, add window, documentas arguments to your instantly-executed (function(){...})() wrapper, with window and window.document as the passed arguments.

7. Object chains do matter, the interpreter has to climb the chain every time you call a method. It's better to cache the function first in performance-critical areas.

(I'm not sure if the interpreter has to climb up the closure nodes every time when using a closed over variable in the same way that it has to climb down the object nodes, but it stands to reason.)

8. Array() is never faster, not even if you're using it to allocate the memory ahead of time with a known size.

9. innerHTML works faster if you have no elements to destroy when you make your change. Therefore, if you don't care about losing bound events, you should clone the element first:
var clone = DOMElement.cloneNode(false);
clone.innerHTML = /* stuff */;
DOMElement.parentNode.replaceChild(clone, DOMElement);
DOMElement = clone;


10. apply allows you to call a function and pass an array as its arguments. Therefore, any native function that takes variable arguments can be called with an array you're using. Ex: Math.max.apply( Math, array );

Welp. My "optimized" version with ``flattened loops'' eventually earned me 20 less frames a second. My blind fervor in trying to get rid of mults caused me to bloat out my loops, with the added benefit of making everything harder to read. I had to throw it out and start over. This time I'm going to avoid some of the obviously bad decisions.

P.S. Stupid shit I learned that I should have known already:
1. Modulo and division are slower than multiplication
2. Loop invariants multiplied by loop incrementors can be replaced with addition
3. Loop incrementor comparison:
var len = 10, i;
console.log('for'); for (i = 0; i < 10; i++) console.log(i); // 0-9
console.log('while i++'); i = 0; while(i++ < len) console.log(i); // 1-10
console.log('while ++i'); i = 0; while(++i < len) console.log(i); // 1-9
console.log('while i--'); i = len; while(i--) console.log(i); // 9-0
console.log('while --i'); i = len; while(--i) console.log(i); // 9-1
console.log('do while i--'); i = len; do { console.log(i) } while(i--); // 10-0
console.log('do while --i');i = len; do { console.log(i) } while(--i); // 10-1
console.log('do while len-1 i--');i = len - 1; do { console.log(i) } while(i--); // 9-0
console.log('do while len-1 i--');i = len - 1; do { console.log(i) } while(--i); // 9-1

Name: Anonymous 2011-09-17 4:20

>>44

PHP is the same :<

Don't change these.
Name: Email:
Entire Thread Thread List