Building own VPN protocol (Part 1)
Network professional with 16+ years of experience in networking and network security, passionate about exploring technologies under the hood and applying them in practice.
One day, after experimenting extensively with various VPN technologies, I decided to build my own simple VPN protocol—a practical example that I could ideally test on my home router running OpenWRT.
At first, my goal was minimal: just packet encapsulation without encryption or authentication. This would allow me to establish a tunnel between two hosts, even if one of them was behind NAT—a scenario that simple GRE or IPIP tunneling can’t handle. I thought, why not just encapsulate IP packets inside UDP datagrams? The idea was simple, I liked it, and I wanted to implement it in practice.

From Operating System’s perspective the key points are:
Both sides create a TUN device (dev tun**0 - in Linux terms)
IP packets read from TUN are sent via UDP
UDP payloads received are written back to TUN
Server learns the client’s public IP/port from the first UDP packet (NAT traversal)
I’m a network engineer by trade, and programming is more of a hobby, so I don’t have much experience with system-level development. That’s when I decided to seek help from ChatGPT. I clearly explained the technical requirements and asked it to write a program for Linux, with the possibility of later compiling it for OpenWRT to make it compatible with my home router’s architecture.
To simplify the task, I decided to avoid any control messages or connection initiation processes, focusing purely on UDP encapsulation—even using the same source and destination ports. ChatGPT generated a C-program, which surprisingly compiled successfully into a Linux executable.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <arpa/inet.h>
#include <signal.h>
#include <errno.h>
#define BUFSIZE 2000
static volatile sig_atomic_t running = 1;
void handle_signal(int sig)
{
(void)sig;
running = 0;
}
int tun_alloc(char *dev)
{
struct ifreq ifr;
int fd = open("/dev/net/tun", O_RDWR);
if (fd < 0) {
perror("open tun");
exit(1);
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
perror("TUNSETIFF");
close(fd);
exit(1);
}
strcpy(dev, ifr.ifr_name);
return fd;
}
int main(int argc, char *argv[])
{
if (argc != 4) {
fprintf(stderr, "Usage: %s <tun_name> <peer_ip> <port>\n", argv[0]);
exit(1);
}
/* install signal handlers */
signal(SIGINT, handle_signal);
signal(SIGTERM, handle_signal);
char tun_name[IFNAMSIZ];
strncpy(tun_name, argv[1], IFNAMSIZ);
int tun_fd = tun_alloc(tun_name);
int sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0) {
perror("socket");
exit(1);
}
struct sockaddr_in peer = {0};
peer.sin_family = AF_INET;
peer.sin_port = htons(atoi(argv[3]));
inet_aton(argv[2], &peer.sin_addr);
struct sockaddr_in local = {0};
local.sin_family = AF_INET;
local.sin_port = htons(atoi(argv[3]));
local.sin_addr.s_addr = INADDR_ANY;
if (bind(sock, (struct sockaddr *)&local, sizeof(local)) < 0) {
perror("bind");
exit(1);
}
unsigned char buf[BUFSIZE];
while (running) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(tun_fd, &fds);
FD_SET(sock, &fds);
int maxfd = (tun_fd > sock ? tun_fd : sock) + 1;
int ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR)
continue; /* interrupted by signal */
perror("select");
break;
}
if (FD_ISSET(tun_fd, &fds)) {
int n = read(tun_fd, buf, BUFSIZE);
if (n > 0)
sendto(sock, buf, n, 0,
(struct sockaddr *)&peer, sizeof(peer));
}
if (FD_ISSET(sock, &fds)) {
int n = recv(sock, buf, BUFSIZE, 0);
if (n > 0)
write(tun_fd, buf, n);
}
}
printf("Exiting cleanly\n");
close(sock);
close(tun_fd);
return 0;
}
I then asked it to prepare a deployment setup using two Docker containers. The outcome worked well, though it took a couple of iterations with the AI before I could build and run it smoothly.
Next, I modified the setup to separate client and server roles: the server listens on UDP port 5555, while the client initiates connections from a random source port. This allowed the tunnel to work even if the client was behind NAT/PAT.

I deployed the server on an AWS VM and tested connectivity from a client running in a Docker container on my home machine. Packet captures confirmed that encapsulation was working correctly.

Finally, I decided to compile the program for OpenWRT. My home router didn’t have enough memory to install GCC and all dependencies, so I had to use cross-compiling. The process was tricky, but I managed to complete it. I was impressed by how well ChatGPT understands OpenWRT architecture and the cross-compiling process.

In the end, I successfully compiled the program for the correct CPU architecture and OpenWRT version, ran the VPN server on my home router, and sent data through the UDP tunnel.


You can watch the entire process in this video on my YouTube channel Dark Wire Labs.
If you like this article, I will write Part 2 about adding payload encryption, peer authentication, and support for multiple simultaneous clients and tunnels on the server.
P.S. full code is here:
Client (tun_udp_client.c):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <arpa/inet.h>
#include <signal.h>
#include <errno.h>
#define BUFSIZE 2000
static volatile sig_atomic_t running = 1;
void handle_signal(int sig) { (void)sig; running = 0; }
int tun_alloc(char *dev) {
struct ifreq ifr;
int fd = open("/dev/net/tun", O_RDWR);
if (fd < 0) { perror("open tun"); exit(1); }
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
if (ioctl(fd, TUNSETIFF, &ifr) < 0) { perror("TUNSETIFF"); close(fd); exit(1); }
strcpy(dev, ifr.ifr_name);
return fd;
}
int main(int argc, char *argv[]) {
if (argc != 5) {
fprintf(stderr, "Usage: %s <tun_name> <server_ip> <server_port> <local_port>\n", argv[0]);
exit(1);
}
signal(SIGINT, handle_signal);
signal(SIGTERM, handle_signal);
char tun_name[IFNAMSIZ];
strncpy(tun_name, argv[1], IFNAMSIZ);
int tun_fd = tun_alloc(tun_name);
int sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0) { perror("socket"); exit(1); }
/* Bind client to specific local port */
struct sockaddr_in local = {0};
local.sin_family = AF_INET;
local.sin_port = htons(atoi(argv[4]));
local.sin_addr.s_addr = INADDR_ANY;
if (bind(sock, (struct sockaddr *)&local, sizeof(local)) < 0) {
perror("bind"); exit(1);
}
struct sockaddr_in server = {0};
server.sin_family = AF_INET;
server.sin_port = htons(atoi(argv[3]));
inet_aton(argv[2], &server.sin_addr);
unsigned char buf[BUFSIZE];
while (running) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(tun_fd, &fds);
FD_SET(sock, &fds);
int maxfd = (tun_fd > sock ? tun_fd : sock) + 1;
int ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR) continue;
perror("select"); break;
}
/* TUN -> send to server */
if (FD_ISSET(tun_fd, &fds)) {
int n = read(tun_fd, buf, BUFSIZE);
if (n > 0)
sendto(sock, buf, n, 0, (struct sockaddr *)&server, sizeof(server));
}
/* UDP -> TUN */
if (FD_ISSET(sock, &fds)) {
int n = recv(sock, buf, BUFSIZE, 0);
if (n > 0)
write(tun_fd, buf, n);
}
}
printf("Client exiting cleanly\n");
close(sock);
close(tun_fd);
return 0;
}
Server (tun_udp_server.c):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <arpa/inet.h>
#include <signal.h>
#include <errno.h>
#define BUFSIZE 2000
static volatile sig_atomic_t running = 1;
void handle_signal(int sig) {
(void)sig;
running = 0;
}
int tun_alloc(char *dev) {
struct ifreq ifr;
int fd = open("/dev/net/tun", O_RDWR);
if (fd < 0) { perror("open tun"); exit(1); }
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
perror("TUNSETIFF"); close(fd); exit(1);
}
strcpy(dev, ifr.ifr_name);
return fd;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <tun_name> <listen_port>\n", argv[0]);
exit(1);
}
signal(SIGINT, handle_signal);
signal(SIGTERM, handle_signal);
char tun_name[IFNAMSIZ];
strncpy(tun_name, argv[1], IFNAMSIZ);
int tun_fd = tun_alloc(tun_name);
int sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0) { perror("socket"); exit(1); }
struct sockaddr_in local = {0};
local.sin_family = AF_INET;
local.sin_port = htons(atoi(argv[2]));
local.sin_addr.s_addr = INADDR_ANY;
if (bind(sock, (struct sockaddr *)&local, sizeof(local)) < 0) {
perror("bind"); exit(1);
}
unsigned char buf[BUFSIZE];
struct sockaddr_in client_addr;
socklen_t client_len = sizeof(client_addr);
int client_known = 0;
while (running) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(tun_fd, &fds);
FD_SET(sock, &fds);
int maxfd = (tun_fd > sock ? tun_fd : sock) + 1;
int ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR) continue;
perror("select"); break;
}
/* Read from TUN -> send to client */
if (FD_ISSET(tun_fd, &fds) && client_known) {
int n = read(tun_fd, buf, BUFSIZE);
if (n > 0)
sendto(sock, buf, n, 0, (struct sockaddr *)&client_addr, client_len);
}
/* Read from UDP -> write to TUN */
if (FD_ISSET(sock, &fds)) {
struct sockaddr_in src;
socklen_t len = sizeof(src);
int n = recvfrom(sock, buf, BUFSIZE, 0, (struct sockaddr *)&src, &len);
if (n > 0) {
write(tun_fd, buf, n);
/* remember client for response */
client_addr = src;
client_len = len;
client_known = 1;
}
}
}
printf("Server exiting cleanly\n");
close(sock);
close(tun_fd);
return 0;
}
See you!
Mikhail