How to scale Node.JS servers with clustering?

How to scale Node.JS servers with clustering?

Node.JS by default doesn’t utilize all CPU cores, only one. You can optimize your Node.JS server for CPU-intensive operations using the cluster module.

In this post, we will compare building a web application without clustering, using cluster module, using PM2 process manager, and with SVR.JS web server (which has clustering; web applications only).

Building an application without clustering

Let’s imagine we build our Fibonacci number computing API (a CPU-intensive operation).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
var http = require("http");
var port = 3000;

function cBigInt(n) {
if(BigInt) return BigInt(n);
else return Number(n);
}

function fibonacci(n) {
n = Math.round(n);
if(n <= 0) {
return cBigInt(0);
} else if(n == 1) {
return cBigInt(1);
} else {
var n1 = cBigInt(0);
var n2 = cBigInt(1);
for(var i = cBigInt(1); i < cBigInt(n); i++) {
var n3 = n2 + n1;
n1 = n2;
n2 = n3;
}
return n2;
}
}

var server = http.createServer(function (req, res) {
var uobj = new URL(req.url, "https://example.com");
if(uobj.pathname == "/fibonacci") {
var n = parseInt(uobj.searchParams.get("n"));
if (isNaN(n) || n === null) {
res.writeHead(400, "Bad Request", {
"Content-Type": "text/plain"
});
res.end("400 Bad Request");
} else {
if (n > 100000) n = 100000; // Prevent denial of service
res.writeHead(200, "OK", {
"Content-Type": "text/plain"
});
res.end(fibonacci(n).toString());
}
} else {
res.writeHead(404, "Not Found", {
"Content-Type": "text/plain"
});
res.end("404 Not Found");
}
}).listen(port, function() {
console.log("Server is listening at port " + server.address().port);
});

This server will calculate numbers in the Fibonacci sequence, when the http://localhost:3000/fibonacci?n=50 is accessed (replace “50” with the index of the Fibonacci sequence; n-th Fibonacci number will be computed).

You can save this server as app.js, and then run it using:

1
node app.js

Let’s benchmark this server with n = 20000 and 5 concurrent connection for 10 seconds using ab command (it can be installed using sudo apt install apache2-utils on Debian-based GNU/Linux distributions, sudo dnf install httpd-tools on Red Hat-based systems, sudo zypper install apache2-utils on SUSE-based systems, or sudo pacman -S apache on Arch-based systems; running the command for Arch-based systems also installs Apache web server).

1
ab -c 5 -t 10 http://localhost:3000/fibonacci?n=20000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
This is ApacheBench, Version 2.3 <$Revision: 1843412 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking localhost (be patient)
Finished 948 requests


Server Software:
Server Hostname: localhost
Server Port: 3000

Document Path: /fibonacci?n=20000
Document Length: 4180 bytes

Concurrency Level: 5
Time taken for tests: 10.006 seconds
Complete requests: 948
Failed requests: 0
Total transferred: 4075512 bytes
HTML transferred: 3979360 bytes
Requests per second: 94.75 [#/sec] (mean)
Time per request: 52.773 [ms] (mean)
Time per request: 10.555 [ms] (mean, across all concurrent requests)
Transfer rate: 397.77 [Kbytes/sec] received

Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.0 0 0
Processing: 10 52 15.5 52 95
Waiting: 10 36 14.1 41 68
Total: 10 52 15.5 52 95

Percentage of the requests served within a certain time (ms)
50% 52
66% 53
75% 54
80% 56
90% 73
95% 83
98% 86
99% 92

Without clustering, we have got around 95 requests per second.

We have a problem though, because the Node.JS server only runs on one CPU core, but servers often have multiple cores! Let’s use the cluster module to scale the server!

Clustering the application using the cluster module.

You can use this code to add clustering to the server:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
// Global variables and requires go here.
var cluster = require("cluster");
var os = require("os");

if (cluster.isWorker) {
console.log("Started worker with PID " + process.pid);
// Code for worker process goes here.
} else {
console.log("Primary with PID " + process.pid + " is running");
var processesToStart = os.availableParallelism ? os.availableParallelism() : os.cpus().length;
for (var i = 0; i < processesToStart; i++) {
cluster.fork();
}
cluster.on("exit", function (worker, code, signal) {
if (code != 0 && !signal) {
console.log("Worker with PID " + process.pid + " exited");
} else {
console.log("Worker with PID " + process.pid + " died, restarting the worker...");
cluster.fork();
}
});
// Code for primary process goes here.
}

First off, there is a check whenever the process is primary or a worker. If it is a worker, then the code for worker process is execute. If it is a primary process, first the amount of processes to fork is determined by os.availableParallelism() function. If it is not present, then it is determined by amount of CPU cores. Then the request amount of processes is forked. When the worker process crashes or it is killed by signal, then it automatically restarts a worker.

Worker processes are now distributed by the OS scheduler to multiple CPU cores. If not using Windows, then the default approach is round-robin (time is assigned in circular order; handling all the processes without priority). If using Windows, then the default approach is sharing the socket, which theoretically give the best performance, but in practice it is not due to OS scheduler quirks.

There is a Fibonacci-computing server code with clustering:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
var http = require("http");
var port = 3000;
var cluster = require("cluster");
var os = require("os");

function cBigInt(n) {
if(BigInt) return BigInt(n);
else return Number(n);
}

function fibonacci(n) {
n = Math.round(n);
if(n <= 0) {
return cBigInt(0);
} else if(n == 1) {
return cBigInt(1);
} else {
var n1 = cBigInt(0);
var n2 = cBigInt(1);
for(var i = cBigInt(1); i < cBigInt(n); i++) {
var n3 = n2 + n1;
n1 = n2;
n2 = n3;
}
return n2;
}
}

if (cluster.isWorker) {
console.log("Started worker with PID " + process.pid);
var server = http.createServer(function (req, res) {
var uobj = new URL(req.url, "https://example.com");
if(uobj.pathname == "/fibonacci") {
var n = parseInt(uobj.searchParams.get("n"));
if (isNaN(n) || n === null) {
res.writeHead(400, "Bad Request", {
"Content-Type": "text/plain"
});
res.end("400 Bad Request");
} else {
if (n > 100000) n = 100000; // Prevent denial of service
res.writeHead(200, "OK", {
"Content-Type": "text/plain"
});
res.end(fibonacci(n).toString());
}
} else {
res.writeHead(404, "Not Found", {
"Content-Type": "text/plain"
});
res.end("404 Not Found");
}
}).listen(port);
} else {
console.log("Primary with PID " + process.pid + " is running");
var processesToStart = os.availableParallelism ? os.availableParallelism() : os.cpus().length;
for (var i = 0; i < processesToStart; i++) {
cluster.fork();
}
cluster.on("exit", function (worker, code, signal) {
if (code != 0 && !signal) {
console.log("Worker with PID " + process.pid + " exited");
} else {
console.log("Worker with PID " + process.pid + " died, restarting the worker...");
cluster.fork();
}
});
console.log("The server will listen on port " + port + "...");
}

You can save this server as app.js again, and then run it using:

1
node app.js

Let’s benchmark the server again with n = 20000 and 5 concurrent connection for 10 seconds using ab command.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
This is ApacheBench, Version 2.3 <$Revision: 1843412 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking localhost (be patient)
Finished 2983 requests


Server Software:
Server Hostname: localhost
Server Port: 3000

Document Path: /fibonacci?n=20000
Document Length: 4180 bytes

Concurrency Level: 5
Time taken for tests: 10.004 seconds
Complete requests: 2983
Failed requests: 0
Total transferred: 12770223 bytes
HTML transferred: 12468940 bytes
Requests per second: 298.17 [#/sec] (mean)
Time per request: 16.769 [ms] (mean)
Time per request: 3.354 [ms] (mean, across all concurrent requests)
Transfer rate: 1246.54 [Kbytes/sec] received

Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.0 0 0
Processing: 12 17 3.5 16 47
Waiting: 12 16 3.5 16 45
Total: 12 17 3.5 16 47

Percentage of the requests served within a certain time (ms)
50% 16
66% 18
75% 20
80% 20
90% 21
95% 22
98% 22
99% 23
100% 47 (longest request)

With clustering, we got around 298 requests per second, which is 3.1 times faster than what we got without clustering. That means we have successfully optimized our Fibonacci-computing server!

If you want to communicate between primary and workers, you can use IPC. Workers can use process.send() function, while primary process can iterate cluster.workers and for each worker use send() function to broadcast a IPC message to all workers. Processes can also receive IPC messaging by listening to “message” event in either process in workers, or elements in cluster.workers in primary process.

Example code with workers sending messages to the primary process:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// Global variables and requires go here.
var cluster = require("cluster");
var os = require("os");

if (cluster.isWorker) {
console.log("Started worker with PID " + process.pid);
process.send("Hello from worker!");
} else {
console.log("Primary with PID " + process.pid + " is running");
var processesToStart = os.availableParallelism ? os.availableParallelism() : os.cpus().length;
for (var i = 0; i < processesToStart; i++) {
var worker = cluster.fork();
worker.on("message", function (msg) {
console.log("Received a message: " + msg);
});
}
cluster.on("exit", function (worker, code, signal) {
if (code != 0 && !signal) {
console.log("Worker with PID " + process.pid + " exited");
} else {
console.log("Worker with PID " + process.pid + " died, restarting the worker...");
var worker = cluster.fork();
worker.on("message", function (msg) {
console.log("Received a message: " + msg);
});
}
});
// Code for primary process goes here.
}

Using PM2 - an advanced, production process manager for Node.JS

PM2 is an advanced, production process manager for Node.JS. It runs as a daemon and it’s built around the cluster module. It also automatically restarts applications, while they crash, making it suitable choice for production Node.JS servers.

To install PM2, run npm install -g pm2. Then you can use non-clustered application code from the “Building an application without clustering” section, save it as server.js and run:

1
pm2 start app.js -i 0

The -i option specifies the amount of processes to start. If it is set to 0, then PM2 starts as much processes as there are CPU cores.

The table from this command may look like this:

1
2
3
4
5
6
7
8
9
10
11
12
┌────┬────────┬─────────────┬─────────┬─────────┬──────────┬────────┬──────┬───────────┬──────────┬──────────┬──────────┬──────────┐
│ id │ name │ namespace │ version │ mode │ pid │ uptime │ ↺ │ status │ cpu │ mem │ user │ watching │
├────┼────────┼─────────────┼─────────┼─────────┼──────────┼────────┼──────┼───────────┼──────────┼──────────┼──────────┼──────────┤
│ 0 │ app │ default │ N/A │ cluster │ 9886 │ 2s │ 0 │ online │ 0% │ 43.2mb │ dorians │ disabled │
│ 1 │ app │ default │ N/A │ cluster │ 9893 │ 2s │ 0 │ online │ 0% │ 43.3mb │ dorians │ disabled │
│ 2 │ app │ default │ N/A │ cluster │ 9904 │ 2s │ 0 │ online │ 0% │ 42.5mb │ dorians │ disabled │
│ 3 │ app │ default │ N/A │ cluster │ 9915 │ 1s │ 0 │ online │ 0% │ 43.0mb │ dorians │ disabled │
│ 4 │ app │ default │ N/A │ cluster │ 9926 │ 1s │ 0 │ online │ 0% │ 42.9mb │ dorians │ disabled │
│ 5 │ app │ default │ N/A │ cluster │ 9937 │ 1s │ 0 │ online │ 0% │ 42.8mb │ dorians │ disabled │
│ 6 │ app │ default │ N/A │ cluster │ 9948 │ 0s │ 0 │ online │ 0% │ 42.8mb │ dorians │ disabled │
│ 7 │ app │ default │ N/A │ cluster │ 9959 │ 0s │ 0 │ online │ 0% │ 33.9mb │ dorians │ disabled │
└────┴────────┴─────────────┴─────────┴─────────┴──────────┴────────┴──────┴───────────┴──────────┴──────────┴──────────┴──────────┘

To make PM2 automatically start itself and the application, while OS is starting up, first run:

1
2
pm2 save
pm2 startup

After running pm2 startup command, you will see the command for setting up the startup script. Copy it to the terminal.

You have now a Node.JS server, that automatically starts up while OS is started and survives application crashes!

You can also rate limit the server using iptables:

1
2
3
sudo iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
sudo iptables -A INPUT -p tcp --dport 3000 -m conntrack --ctstate NEW -m limit --limit 20/min --limit-burst 30 -j ACCEPT
sudo iptables -A INPUT -p tcp --dport 3000 -m conntrack --ctstate NEW -j DROP

And saving the iptables rules:

1
sudo bash -c 'iptables-save > /etc/iptables/rules.v4'

To display the log of the server, you can run this command:

1
pm2 log

To list the processes, you can run this command:

1
pm2 list

To restart the server you can run this command:

1
pm2 restart app.js

To stop the server you can run this command:

1
pm2 stop app.js

You can read more about PM2 commands in the PM2 documentation.

Now we can check the performance using the ab command (same parameters as before).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
This is ApacheBench, Version 2.3 <$Revision: 1843412 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking localhost (be patient)
Finished 2931 requests


Server Software:
Server Hostname: localhost
Server Port: 3000

Document Path: /fibonacci?n=20000
Document Length: 4180 bytes

Concurrency Level: 5
Time taken for tests: 10.001 seconds
Complete requests: 2931
Failed requests: 0
Total transferred: 12547611 bytes
HTML transferred: 12251580 bytes
Requests per second: 293.08 [#/sec] (mean)
Time per request: 17.060 [ms] (mean)
Time per request: 3.412 [ms] (mean, across all concurrent requests)
Transfer rate: 1225.27 [Kbytes/sec] received

Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.0 0 0
Processing: 12 17 3.4 17 32
Waiting: 12 17 3.4 16 32
Total: 12 17 3.4 17 32

Percentage of the requests served within a certain time (ms)
50% 17
66% 19
75% 20
80% 21
90% 22
95% 22
98% 23
99% 23
100% 32 (longest request)

We got around 293 requests per second. The request per second count may fluctuate a little bit from different performance measurements. We have still 3.1x more requests per second, than without clustering.

Using SVR.JS - a web server running on Node.JS

NOTE: this section is for web applications only! This is a web server that uses clustering, not a standalone process manager.

SVR.JS is a web server running on Node.JS. SVR.JS uses clustering to scale up web applications, and automatically restarts worker when they crash, making it an another suitable choice for production web applications.

To install SVR.JS (on GNU/Linux), go to the SVR.JS website, switch to the “GNU/Linux” tab, and then click the “Copy” button to copy the installation command.

Then paste the command into terminal. You may be prompted to choose either stable version, LTS version or from zip archive. When in doubt, choose LTS version, and when it is not working, choose stable version.

After installing SVR.JS, change the useWebRootServerSideScript property to false, disableTrailingSlashRedirects property to true, and empty out the rewriteMap array in the /etc/svrjs-config.json like this:

1
2
3
4
5
6
7
{
...,
"rewriteMap": [],
"disableTrailingSlashRedirects": true,
"useWebRootServerSideScript": false,
...
}

Setting useWebRootServerSideScript property to false allows you to use server-side JavaScript in the SVR.JS installation directory (usually /usr/lib/svrjs). Setting disableTrailingSlashRedirects property to true will improve performance, as the trailing slash redirection functionality require accessing the file system, and we will not use static file serving functionality anyway. Emptying out rewriteMap array will remove all the URL rewriting rules.

After modifying the configuration file, you can create SVR.JS server-side JavaScript file in /usr/lib/svrjs/serverSideScript.js. You can read more about it in the SVR.JS documentation

The code for Fibonacci-computing server will look like this (serverSideScript.js file):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
disableEndElseCallbackExecute = true; //Without "var", else it will not work!!!

function cBigInt(n) {
if(BigInt) return BigInt(n);
else return Number(n);
}

function fibonacci(n) {
n = Math.round(n);
if(n <= 0) {
return cBigInt(0);
} else if(n == 1) {
return cBigInt(1);
} else {
var n1 = cBigInt(0);
var n2 = cBigInt(1);
for(var i = cBigInt(1); i < cBigInt(n); i++) {
var n3 = n2 + n1;
n1 = n2;
n2 = n3;
}
return n2;
}
}

// Using the same code as it would be without SVR.JS
var uobj = new URL(req.url, "https://example.com");
if(uobj.pathname == "/fibonacci") {
var n = parseInt(uobj.searchParams.get("n"));
if (isNaN(n) || n === null) {
res.writeHead(400, "Bad Request", {
"Content-Type": "text/plain"
});
res.end("400 Bad Request");
} else {
if (n > 100000) n = 100000; // Prevent denial of service
res.writeHead(200, "OK", {
"Content-Type": "text/plain"
});
res.end(fibonacci(n).toString());
}
} else {
res.writeHead(404, "Not Found", {
"Content-Type": "text/plain"
});
res.end("404 Not Found");
}

After adding the code, restart the server using sudo systemctl restart svrjs or sudo /etc/init.d/svrjs restart.

Now the URL is different, because the server listens on port 80, and not on port 3000. This server will calculate numbers in the Fibonacci sequence, when the http://localhost/fibonacci?n=50 is accessed (replace “50” with the index of the Fibonacci sequence; n-th Fibonacci number will be computed).

Now we can check the performance using the ab command:

1
ab -c 5 -t 10 http://localhost:3000/fibonacci?n=20000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
This is ApacheBench, Version 2.3 <$Revision: 1843412 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking localhost (be patient)
Finished 2846 requests


Server Software: SVR.JS/3.14.14
Server Hostname: localhost
Server Port: 80

Document Path: /fibonacci?n=20000
Document Length: 4180 bytes

Concurrency Level: 5
Time taken for tests: 10.006 seconds
Complete requests: 2846
Failed requests: 0
Total transferred: 12326026 bytes
HTML transferred: 11896280 bytes
Requests per second: 284.42 [#/sec] (mean)
Time per request: 17.579 [ms] (mean)
Time per request: 3.516 [ms] (mean, across all concurrent requests)
Transfer rate: 1202.97 [Kbytes/sec] received

Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.0 0 0
Processing: 12 17 3.8 17 48
Waiting: 12 17 3.8 17 47
Total: 12 17 3.8 17 48

Percentage of the requests served within a certain time (ms)
50% 17
66% 19
75% 21
80% 21
90% 22
95% 23
98% 24
99% 26
100% 48 (longest request)

We got around 284 requests per second, which is a little bit slower than servers built in bare cluster module and PM2, due to overhead from other operations performed by SVR.JS, such as path sanitation or server logging. You may also expect a little slowness, while building a web application using a web application framework, like Express or Koa. The request per second count may fluctuate a little bit from different performance measurements. It is still 3 times faster than what we got without clustering.

There is still a problem with our SVR.JS setup, it exposes server version, and it still has directory listings enabled. To disable it, modify /etc/svrjs-config.json file like this:

1
2
3
4
5
6
7
{
...,
"exposeServerVersion": false,
"exposeModsInErrorPages": false,
"enableDirectoryListing": false,
...
}

We have a nearly production-ready web application running on SVR.JS! We can also rate limit it using iptables:

1
2
3
sudo iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
sudo iptables -A INPUT -p tcp --dport 80 -m conntrack --ctstate NEW -m limit --limit 20/min --limit-burst 30 -j ACCEPT
sudo iptables -A INPUT -p tcp --dport 80 -m conntrack --ctstate NEW -j DROP

And saving the iptables rules:

1
sudo bash -c 'iptables-save > /etc/iptables/rules.v4'

Conclusion

In this post, you scaled your server using cluster module. You first built a web application without clustering, then used bare cluster module, and finally used PM2 process manager. You might even created a web application with SVR.JS web server, which already has clustering. You have compared the performance of each of web application building approaches.

For more information about the cluster module, you can check out the Node.JS documentation.