Open Bug 671324 Opened 13 years ago Updated 2 years ago

Processing.js Noise3D slower in Firefox than WebKit

Categories

(Core :: JavaScript Engine, defect)

x86
macOS
defect

Tracking

()

People

(Reporter: humph, Unassigned)

References

()

Details

This small test is a lot slower in Firefox (4 through 8) than Chrome or other browsers.  On Firefox I get ~5-10 fps, and on Chrome it's more like ~40.  I don't think the canvas pixel flipping is affecting this that much, and it looks like it's just the cost of doing noise(), however I can't seem to get a shark build working here to know for sure.

I think that the relevant bits of code are:

Processing Code:
----------------

float increment = 0.04;
float zoff = 0.0;
float zincrement = 0.08;

void setup() {
  size(100,100);
}

void draw() {
  background(0);

  // Optional: adjust noise detail here
  // noiseDetail(8,0.65f);

  loadPixels();

  float xoff = 0.0; // Start xoff at 0

  // For every x,y coordinate in a 2D space, calculate a noise value and produce a brightness value
  for (int x = 0; x < width; x++) {
    xoff += increment;   // Increment xoff
    float yoff = 0.0f;   // For every xoff, start yoff at 0
    for (int y = 0; y < height; y++) {
      yoff += increment; // Increment yoff

      // Calculate noise and scale by 255
      float bright = noise(xoff,yoff,zoff)*255;

      // Set each pixel onscreen to a grayscale value
      pixels[x+y*width] = color(bright,bright,bright);
    }
  }
  updatePixels();

  zoff += zincrement; // Increment zoff
}


Relevant JavaScript Code:
-------------------------

// Noise functions and helpers
function PerlinNoise(seed) {
  var rnd = seed !== undef ? new Marsaglia(seed) : Marsaglia.createRandomized();
  var i, j;
  // http://www.noisemachine.com/talk1/17b.html
  // http://mrl.nyu.edu/~perlin/noise/
  // generate permutation
  var perm = new Uint8Array(512);
  for(i=0;i<256;++i) { perm[i] = i; }
  for(i=0;i<256;++i) { var t = perm[j = rnd.nextInt() & 0xFF]; perm[j] = perm[i]; perm[i] = t; }
  // copy to avoid taking mod in perm[0];
  for(i=0;i<256;++i) { perm[i + 256] = perm[i]; }

  function grad3d(i,x,y,z) {
    var h = i & 15; // convert into 12 gradient directions
    var u = h<8 ? x : y,
        v = h<4 ? y : h===12||h===14 ? x : z;
    return ((h&1) === 0 ? u : -u) + ((h&2) === 0 ? v : -v);
  }

  function grad2d(i,x,y) {
    var v = (i & 1) === 0 ? x : y;
    return (i&2) === 0 ? -v : v;
  }

  function grad1d(i,x) {
    return (i&1) === 0 ? -x : x;
  }

  function lerp(t,a,b) { return a + t * (b - a); }

  this.noise3d = function(x, y, z) {
    var X = Math.floor(x)&255, Y = Math.floor(y)&255, Z = Math.floor(z)&255;
    x -= Math.floor(x); y -= Math.floor(y); z -= Math.floor(z);
    var fx = (3-2*x)*x*x, fy = (3-2*y)*y*y, fz = (3-2*z)*z*z;
    var p0 = perm[X]+Y, p00 = perm[p0] + Z, p01 = perm[p0 + 1] + Z,
        p1 = perm[X + 1] + Y, p10 = perm[p1] + Z, p11 = perm[p1 + 1] + Z;
    return lerp(fz,
      lerp(fy, lerp(fx, grad3d(perm[p00], x, y, z), grad3d(perm[p10], x-1, y, z)),
               lerp(fx, grad3d(perm[p01], x, y-1, z), grad3d(perm[p11], x-1, y-1,z))),
      lerp(fy, lerp(fx, grad3d(perm[p00 + 1], x, y, z-1), grad3d(perm[p10 + 1], x-1, y, z-1)),
               lerp(fx, grad3d(perm[p01 + 1], x, y-1, z-1), grad3d(perm[p11 + 1], x-1, y-1,z-1))));
    };

    this.noise2d = function(x, y) {
      var X = Math.floor(x)&255, Y = Math.floor(y)&255;
      x -= Math.floor(x); y -= Math.floor(y);
      var fx = (3-2*x)*x*x, fy = (3-2*y)*y*y;
      var p0 = perm[X]+Y, p1 = perm[X + 1] + Y;
      return lerp(fy,
        lerp(fx, grad2d(perm[p0], x, y), grad2d(perm[p1], x-1, y)),
        lerp(fx, grad2d(perm[p0 + 1], x, y-1), grad2d(perm[p1 + 1], x-1, y-1)));
    };

    this.noise1d = function(x) {
      var X = Math.floor(x)&255;
      x -= Math.floor(x);
      var fx = (3-2*x)*x*x;
      return lerp(fx, grad1d(perm[X], x), grad1d(perm[X+1], x-1));
    };
  }

p.noise = function(x, y, z) {
  if(noiseProfile.generator === undef) {
    // caching
    noiseProfile.generator = new PerlinNoise(noiseProfile.seed);
  }
  var generator = noiseProfile.generator;
  var effect = 1, k = 1, sum = 0, al = arguments.length, f;

  if (al === 1) {
    f = function(k, effect) { return effect * (1 + generator.noise1d(k*x))/2; };
  } else if (al === 2) {
    f = function(k, effect) { return effect * (1 + generator.noise2d(k*x, k*y))/2; };
  } else if (al === 3) {
    f = function(k, effect) { return effect * (1 + generator.noise3d(k*x, k*y, k*z))/2; };
  }

  for (var i=0, o = noiseProfile.octaves; i < o; ++i) {
    effect *= noiseProfile.fallout;
    sum += f(k, effect);
    k *= 2;
  }

  return sum;
};
Shark says 85% of the time is in mjit-generated code.  If I turn off mjit, we're 65% interp, which is never a good sign.  ;)

Other than the mjit-generated code, there's 4% of math_floor, and then stub calls: Lambda, CreateFuncCallObject, StrictNe, ArgCnt, BitAnd, StrictEq.

It's interesting that JM stubs StrictNe and StrictEq; I'd think those would be easy to inline (except for NaN and int-vs-double representations of the same number, just a jsval equality compare would do the trick).  The Lambda and CreateFuncCallObject is presumably due to the fact that every call to PerlinNoise creates several new function objects.  The ArgCnt is the arguments.length in p.noise, I bet.

JM+TI spends a bit less time in mjit-generated code, but 5% under the Arguments stubcall (which JM without TI seems to avoid?).  Other than that, looks about the same.  The visual look is a little better in JM+TI, though it starts off pretty quick and then slows down as time goes on...  Objectively, the profiler shows 2% of time spent outside JS under JM, and 5.5% under JM+TI, so JM+TI is about 2-3x faster, presumably.
(In reply to comment #1)
> It's interesting that JM stubs StrictNe and StrictEq; I'd think those would
> be easy to inline (except for NaN and int-vs-double representations of the
> same number, just a jsval equality compare would do the trick).

stricteq could be (much) more efficient in JM (the main benchmarks don't use it much...) Currently we inline int32 === int32 and inline if lhs or rhs has undefined/null type or is a true/false constant. For non-strict equality we fuse with branching ops and have an equality IC (with TI we can probably do without the IC). Will fix bug 590161 when I return. There's no reason for stricteq to be slower than non-strict eq, we should at least also handle double, objects and booleans inline. We could even compare strings inline. Or at least their lengths...
Depends on: 590161
Any thoughts on who might be able to work on this?
For a start, we should remeasure now that TI is on.

Also, sfink is working on a profiler which might be of use here...
Assignee: general → nobody
Severity: normal → S3
You need to log in before you can comment on or make changes to this bug.