diff --git a/README.md b/README.md index 0853a1c..6f2b08e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,196 @@ -# dotp +# DNS-TProxy -Domain Transport Proxy with DNS and Netfilter \ No newline at end of file +DNS-based transparent proxy using fake IPs and nftables to redirect and NAT selected domain traffic. +`dotp` could stand for Domain Over Transparent Proxy. + +## Overview + +`dotp` is a small DNS proxy that selectively rewrites A and AAAA records for +specified domains to "fake" IP addresses and maintains a temporary +one-to-one mapping between fake and real client addresses using nftables. This +allows traffic to be transparently redirected and NAT-ed while keeping +per-connection state in userspace and in kernel nftables sets. + +The program: + +- Listens for DNS queries on a UDP socket. +- Forwards queries to an upstream DNS server. +- Inspects the DNS responses and, for configured domains, replaces the + returned IPv4/IPv6 addresses with fake addresses from configured prefixes. +- Programs nftables sets to map fake addresses back to the corresponding + real addresses for a short TTL. + +## How it works + +### DNS path + +1. A client sends a DNS query (UDP/53) to `dotp`. +2. `dotp` receives the packet, allocates a per-request `client_ctx`, and opens + a temporary UDP socket to the upstream DNS server. +3. The upstream response is read and parsed: + - DNS header and question section are copied as-is. + - Answer section is scanned record by record. + - For each name that matches a configured domain: + - CNAME records are copied over, preserving compression and structure. + - A/AAAA records are intercepted: their RDATA (IP address) is replaced + with a fake IP allocated from the configured IPv4/IPv6 prefix pools. +4. For every substituted address: + - A NAT entry is created or looked up in an in-memory hash table. + - A libev timer is armed for 120 seconds (`NAT_TTL`) for the mapping. + - nftables commands are issued (via libnftables) to: + - Add the real address to `inet nat_addr` / `nat_addr6` sets. + - Add a mapping from fake to real in `inet
nat_map` / + `nat_map6` sets. +5. The modified DNS response is sent back to the client. +6. When the NAT timer expires, `dotp` removes the corresponding entries from + the nftables sets and frees the mapping. + +If the incoming DNS packet is larger than the internal buffer +(`MAX_MESSAGE_SIZE`), the response is truncated and the TC (truncation) flag is +set in the DNS header. + +### Domain matching + +Domains are supplied via the `-d` option and stored in a tree of labels. During +DNS parsing, domain names in questions and answers are decoded, including +compression pointers, and matched against this tree: + +- Only names under one of the configured domains are subject to rewriting. +- CNAME chains are followed so that subsequent A/AAAA answers for the aliased + name are also rewritten. + +### NAT pool + +`dotp` manages two address pools (IPv4 and IPv6) defined by prefixes passed on +the command line. It uses a simple hash (`city_hash_mix`) to allocate unique +fake addresses within the prefix ranges: + +- For each real address, a `struct ip_nat` is created containing: + - real address and fake address + - family (AF_INET / AF_INET6) + - links for two separate hash chains (by real and by fake address) + - a libev timer (`expire`) that removes the mapping after `NAT_TTL` seconds. +- Lookups by real address refresh the timer. + +## nftables integration + +The code assumes an existing nftables table named `inet dotp` with appropriate +sets/rules. It manipulates the following sets: + +- `nat_addr` / `nat_addr6`: containers for real client addresses. +- `nat_map` / `nat_map6`: maps from fake to real addresses. + +It uses commands like: + +- `add element inet dotp nat_addr { REAL }` +- `add element inet dotp nat_map { FAKE:REAL }` +- `add element inet dotp nat_addr6 { [REAL6] }` +- `add element inet dotp nat_map6 { [FAKE6]:[REAL6] }` + +and their corresponding `delete element` variants on expiry. + +The nftables rules themselves (e.g. for DNAT/SNAT using these sets) are not +set up by the program; an example configuration is provided in `src/rules.nft`. + +## Command-line usage + +From the `main` function, the expected usage is: + +```text +Usage: dotp -H LISTEN_HOST -p LISTEN_PORT + -4 FAKE_IP_PREFIX -6 FAKE_IP6_PREFIX + --upstream-host UPSTREAM_HOST + --upstream-port UPSTREAM_PORT + [ -d DOMAIN ] + [ --daemonize ] +``` + +### Options + +- `-H, --host LISTEN_HOST` + IP address to bind the local UDP DNS listener to (IPv4 only in current + implementation). Required. + +- `-p, --port LISTEN_PORT` + Local UDP port to listen on. Defaults to 53. + +- `-d, --domain DOMAIN` + Domain to subject to address rewriting (may be specified multiple times). + Domain syntax is validated (alphanumeric plus `-`, no leading/trailing `-`, + each label up to 63 chars). + +- `-4, --ipv4-prefix FAKE_IP_PREFIX` + IPv4 prefix (e.g. `100.64.0.0/24`) from which fake IPv4 addresses will be + allocated. Prefix length must be ≤ 30. Required. + +- `-6, --ipv6-prefix FAKE_IP6_PREFIX` + IPv6 prefix (e.g. `fd00::/64`) from which fake IPv6 addresses will be + allocated. Prefix length must be ≤ 64. Required. + +- `--upstream-host UPSTREAM_HOST` + IPv4 address of the upstream DNS server to which queries are forwarded. + Required. + +- `--upstream-port UPSTREAM_PORT` + UDP port of the upstream DNS server. Defaults to 53. + +- `--daemonize` + Run the process in the background using `daemon(3)`. + +`dotp` exits with error status if required options are missing or invalid. + +## Runtime behavior + +- Uses `libev` for event-driven I/O: + - One main `server_ctx` for the listening socket. + - A short-lived `client_ctx` for each in-flight upstream query with a + 5-second timeout. +- On `SIGINT` or `SIGTERM`, the event loop is broken and the server exits + cleanly, freeing domain/NAT structures and nftables context. + +## Dependencies + +Build-time and runtime dependencies inferred from `src/main.c`: + +- POSIX sockets (`AF_INET`, `SOCK_DGRAM`, `recvfrom`, `sendto`, `bind`) +- `libev` +- `libnftables` + +On Debian/Ubuntu-like systems, packages may be named: + +- `libev-dev` +- `libnftables-dev` + +## Limitations + +- Only UDP DNS is supported. +- Listener and upstream are currently IPv4-only. +- DNS payload size is limited to `MAX_MESSAGE_SIZE` (0x200 bytes). +- nftables table/sets must be created externally (see `src/rules.nft`). + +## Example + +Assuming: + +- You have an nftables table `inet dotp` set up according to `src/rules.nft`. +- You want to redirect traffic for `example.com` and `foo.example.com`. +- You have fake address ranges: + - IPv4: `100.64.0.0/24` + - IPv6: `fd00::/64` +- Your upstream resolver is `1.1.1.1`. + +You might run: + +```sh +./dotp \ + -H 0.0.0.0 -p 53 \ + -4 100.64.0.0/24 \ + -6 fd00::/64 \ + --upstream-host 1.1.1.1 \ + --upstream-port 53 \ + -d example.com +``` + +Point your clients' DNS to the host running `dotp`. For matching domains, +clients will see fake IPs, while nftables rules will map those fake addresses +back to the real ones for the lifetime of the mapping. diff --git a/src/main.c b/src/main.c index 9e71338..b83d57d 100644 --- a/src/main.c +++ b/src/main.c @@ -17,12 +17,17 @@ #include #define MAX_MESSAGE_SIZE 0x200 +#define MAX_FAKE_ADDRESS_SEARCH_ATTEMPS 20 #define DNS_FLAG_TC 0x200 #define DNS_TYPE_A 1 +#define DNS_TYPE_NS 2 +#define DNS_TYPE_CNAME 5 +#define DNS_TYPE_SOA 6 +#define DNS_TYPE_AAAA 28 #define DNS_CLASS_IN 0x0001 -#define NAT_TTL 60 +#define NAT_TTL 120 typedef struct __attribute__((packed)) dns_header { uint16_t id; @@ -40,6 +45,11 @@ typedef struct __attribute__((packed)) dns_answer_header { uint16_t rd_len; } dns_answer_header_t; +typedef union ip_address { + struct in_addr in; + struct in6_addr in6; +} ip_address_t; + typedef struct domain_set { struct domain_set *next; struct domain_set *head; @@ -50,15 +60,17 @@ typedef struct domain_set { typedef struct domain_name { struct domain_name *next; - ssize_t ptr; - domain_set_t *match; + ssize_t ptr, new_ptr; + size_t label_len; + struct domain_name *alias; + domain_set_t *exact_match, *match; } domain_name_t; typedef struct domain_msg { domain_set_t *match_root; ssize_t len; - uint8_t *raw; + uint8_t *raw, *new_raw; domain_name_t *name_head; } domain_msg_t; @@ -69,15 +81,15 @@ typedef struct ip_nat { ev_timer expire; ip_pool_t *pool; struct ip_nat *fake_next, *real_next; - uint32_t fake, real; - int dst_handle, src_handle; + sa_family_t family; + ip_address_t fake_addr, real_addr; } ip_nat_t; typedef struct ip_pool { - uint32_t pf; - uint32_t pf_mask; + struct in_addr net, net_mask; + struct in6_addr net6, net6_mask; - // Indexed by fake + // must be an exponent of 2 size_t size; ip_nat_t **fake; ip_nat_t **real; @@ -107,7 +119,52 @@ typedef struct client_ctx { static struct nft_ctx *nft_ctx; static const char *nft_nat_table = "dotp"; -static char *nft_fake_set; + +static uint64_t city_hash_mix(uint64_t a, uint64_t b) { + a ^= b; + a *= 0x9ddfea08eb382d69ULL; + a ^= (a >> 47); + b ^= a; + b *= 0x9ddfea08eb382d69ULL; + b ^= (b >> 47); + return b * 0x9ddfea08eb382d69ULL; +} + +static uint64_t city_hash_4(const uint32_t *s) { + return city_hash_mix(((uint64_t)s[0] << 3), s[0]); +} + +static uint64_t city_hash_16(const uint64_t *s) { + return city_hash_mix(s[0], s[1]); +} + +static size_t address_hash(sa_family_t family, ip_address_t const *addr) { + if (family == AF_INET) { + return city_hash_4((uint32_t const *)&addr->in.s_addr); + } else if (family == AF_INET6) { + return city_hash_16((uint64_t const *)&addr->in6.s6_addr); + } + return 0; +} + +static int address_cmp(sa_family_t family, ip_address_t const *addr_a, + ip_address_t const *addr_b) { + if (family == AF_INET) { + return memcmp(&addr_a->in.s_addr, &addr_b->in.s_addr, 4); + } else if (family == AF_INET6) { + return memcmp(&addr_a->in6.s6_addr, &addr_b->in6.s6_addr, 16); + } + return 0; +} + +static void address_copy(sa_family_t family, ip_address_t *addr_dst, + ip_address_t const *addr_src) { + if (family == AF_INET) { + memcpy(&addr_dst->in.s_addr, &addr_src->in.s_addr, 4); + } else if (family == AF_INET6) { + memcpy(&addr_dst->in6.s6_addr, &addr_src->in6.s6_addr, 16); + } +} char *malloc_sprintf(const char *fmt, ...) { char *buffer = NULL; @@ -134,22 +191,14 @@ finish: return buffer; } -static uint32_t ip_hash(uint32_t addr) { - return addr ^ ((addr << 3) | (addr >> 29)) ^ ((addr << 7) | (addr >> 25)) ^ - ((addr << 13) | (addr >> 19)); -} - -static void ip_pool_init(ip_pool_t *pool, uint32_t pf, uint32_t pf_mask) { - pool->pf = pf; - pool->pf_mask = pf_mask; - +static void ip_pool_init(ip_pool_t *pool) { pool->size = 0x1000; pool->fake = (ip_nat_t **)malloc(pool->size * sizeof(ip_nat_t *)); pool->real = (ip_nat_t **)malloc(pool->size * sizeof(ip_nat_t *)); - bzero(pool->fake, pool->size * sizeof(ip_nat_t *)); - bzero(pool->real, pool->size * sizeof(ip_nat_t *)); + memset(pool->fake, 0, pool->size * sizeof(ip_nat_t *)); + memset(pool->real, 0, pool->size * sizeof(ip_nat_t *)); } static void nat_free_chain(ip_nat_t *nat) { @@ -167,41 +216,19 @@ static void ip_pool_fini(ip_pool_t *pool) { free(pool->real); } -static void nat_expire(EV_P_ ev_timer *w, int revents) { - ip_nat_t *nat = (ip_nat_t *)w; +static void nat_remove(ip_nat_t *nat) { ip_pool_t *pool = nat->pool; - ev_timer_stop(EV_A, w); + size_t real_bucket = + address_hash(nat->family, &nat->real_addr) & (pool->size - 1); + size_t fake_bucket = + address_hash(nat->family, &nat->fake_addr) & (pool->size - 1); - char *cmd = malloc_sprintf("delete rule ip %s postrouting handle %d", - nft_nat_table, nat->src_handle); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - - cmd = malloc_sprintf("delete rule ip %s prerouting handle %d", nft_nat_table, - nat->dst_handle); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - - if (nft_fake_set) { - char real_ip[16]; - - uint32_t af_real = htonl(nat->real); - inet_ntop(AF_INET, &af_real, real_ip, 16); - - char *cmd = - malloc_sprintf("delete element %s { %s }", nft_fake_set, real_ip); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - } - - uint32_t mask = pool->size - 1; - - ip_nat_t **p_other = &pool->real[nat->real & mask]; + ip_nat_t **p_other = &pool->real[real_bucket]; ip_nat_t *other = *p_other; - for (; other; other = other->real_next) { - if (nat == other) { + for (; other;) { + if (other == nat) { *p_other = nat->real_next; break; } @@ -209,11 +236,11 @@ static void nat_expire(EV_P_ ev_timer *w, int revents) { other = *p_other; } - p_other = &pool->fake[nat->fake & mask]; + p_other = &pool->fake[fake_bucket]; other = *p_other; - for (; other; other = other->fake_next) { - if (nat == other) { + for (; other;) { + if (other == nat) { *p_other = nat->fake_next; break; } @@ -224,26 +251,89 @@ static void nat_expire(EV_P_ ev_timer *w, int revents) { free(nat); } -static ip_nat_t *find_nat(EV_P_ ip_pool_t *pool, uint32_t real_addr) { - size_t mask = pool->size - 1; +static void nat_expire(EV_P_ ev_timer *w, int revents) { + ip_nat_t *nat = (ip_nat_t *)w; - uint32_t h = ip_hash(real_addr); - uint32_t fake_addr; - for (ip_nat_t *nat = pool->real[real_addr & mask]; nat; - nat = nat->real_next) { - if (nat->real == real_addr) { + ev_timer_stop(EV_A, w); + if (nat->family == AF_INET) { + char fake_p[16], real_p[16]; + inet_ntop(AF_INET, &nat->fake_addr.in, fake_p, 16); + inet_ntop(AF_INET, &nat->real_addr.in, real_p, 16); + { + char *cmd = malloc_sprintf("delete element inet %s nat_map { %s }", + nft_nat_table, fake_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + { + char *cmd = malloc_sprintf("delete element inet %s nat_addr { %s }", + nft_nat_table, real_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + } else if (nat->family == AF_INET6) { + char fake_p[40], real_p[40]; + inet_ntop(AF_INET6, &nat->fake_addr.in6, fake_p, 40); + inet_ntop(AF_INET6, &nat->real_addr.in6, real_p, 40); + { + char *cmd = malloc_sprintf("delete element inet %s nat_map6 { [%s] }", + nft_nat_table, fake_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + { + char *cmd = malloc_sprintf("delete element inet %s nat_addr6 { [%s] }", + nft_nat_table, real_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + } + + nat_remove(nat); +} + +static ip_nat_t *nat_find(EV_P_ ip_pool_t *pool, sa_family_t family, + ip_address_t const *addr) { + size_t real_bucket = address_hash(family, addr) & (pool->size - 1); + + for (ip_nat_t *nat = pool->real[real_bucket]; nat; nat = nat->real_next) { + if (nat->family == family && !address_cmp(family, &nat->real_addr, addr)) { ev_timer_again(EV_A, &nat->expire); return nat; } } int ok = 0; - for (; h != 0;) { - fake_addr = pool->pf | (h & ~pool->pf_mask); + ip_address_t fake_addr; + size_t fake_bucket; + + uint64_t h = address_hash(family, addr); + + for (int k = 0; k < MAX_FAKE_ADDRESS_SEARCH_ATTEMPS; k++) { ok = 1; - for (ip_nat_t *nat = pool->fake[fake_addr & mask]; nat; - nat = nat->fake_next) { - if (nat->fake == fake_addr) { + if (family == AF_INET) { + fake_addr.in.s_addr = + pool->net.s_addr | ((uint32_t)h & ~pool->net_mask.s_addr); + } else { + fake_addr.in6.s6_addr32[0] = + pool->net6.s6_addr32[0] | + (addr->in6.s6_addr32[0] & ~pool->net6_mask.s6_addr32[0]); + fake_addr.in6.s6_addr32[1] = + pool->net6.s6_addr32[1] | + (addr->in6.s6_addr32[1] & ~pool->net6_mask.s6_addr32[1]); + fake_addr.in6.s6_addr32[2] = + pool->net6.s6_addr32[2] | + ((uint32_t)(h >> 32) & ~pool->net6_mask.s6_addr32[2]); + fake_addr.in6.s6_addr32[3] = + pool->net6.s6_addr32[3] | + ((uint32_t)h & ~pool->net6_mask.s6_addr32[3]); + } + + fake_bucket = address_hash(family, &fake_addr) & (pool->size - 1); + + for (ip_nat_t *nat = pool->fake[fake_bucket]; nat; nat = nat->fake_next) { + if (nat->family == family && + !address_cmp(family, &nat->fake_addr, &fake_addr)) { ok = 0; break; } @@ -253,63 +343,63 @@ static ip_nat_t *find_nat(EV_P_ ip_pool_t *pool, uint32_t real_addr) { } h++; } - if (h == 0) { + if (!ok) { return NULL; - } else { - ip_nat_t *nat = (ip_nat_t *)malloc(sizeof(ip_nat_t)); + } + ip_nat_t *nat = (ip_nat_t *)malloc(sizeof(ip_nat_t)); - nat->pool = pool; + nat->pool = pool; - ev_init(&nat->expire, nat_expire); - nat->expire.repeat = NAT_TTL; - ev_timer_again(EV_A, &nat->expire); + ev_init(&nat->expire, nat_expire); + nat->expire.repeat = NAT_TTL; + ev_timer_again(EV_A, &nat->expire); - nat->fake = fake_addr; - nat->real = real_addr; + nat->family = family; + address_copy(family, &nat->fake_addr, &fake_addr); + address_copy(family, &nat->real_addr, addr); - nat->fake_next = pool->fake[fake_addr & mask]; - pool->fake[fake_addr & mask] = nat; + nat->fake_next = pool->fake[fake_bucket]; + pool->fake[fake_bucket] = nat; - nat->real_next = pool->real[real_addr & mask]; - pool->real[real_addr & mask] = nat; + nat->real_next = pool->real[real_bucket]; + pool->real[real_bucket] = nat; - char real_ip[16], fake_ip[16]; - uint32_t af_fake = htonl(fake_addr); - uint32_t af_real = htonl(real_addr); + if (family == AF_INET) { + char real_p[16], fake_p[16]; + inet_ntop(AF_INET, &nat->fake_addr.in, fake_p, 16); + inet_ntop(AF_INET, &nat->real_addr.in, real_p, 16); - inet_ntop(AF_INET, &af_fake, fake_ip, 16); - inet_ntop(AF_INET, &af_real, real_ip, 16); - - if (nft_fake_set) { - char *cmd = - malloc_sprintf("add element %s { %s }", nft_fake_set, real_ip); + { + char *cmd = malloc_sprintf("add element inet %s nat_addr { %s }", + nft_nat_table, real_p); nft_run_cmd_from_buffer(nft_ctx, cmd); free(cmd); } + { + char *cmd = malloc_sprintf("add element inet %s nat_map { %s:%s }", + nft_nat_table, fake_p, real_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + } else if (family == AF_INET6) { + char real_p[40], fake_p[40]; + inet_ntop(AF_INET6, &nat->fake_addr.in6, fake_p, 40); + inet_ntop(AF_INET6, &nat->real_addr.in6, real_p, 40); - nft_ctx_buffer_output(nft_ctx); - char *cmd = - malloc_sprintf("add rule ip %s prerouting ip daddr %s dnat to %s", - nft_nat_table, fake_ip, real_ip); - nft_run_cmd_from_buffer(nft_ctx, cmd); - char *echo_fmt = malloc_sprintf("%s # handle %%d", cmd); - sscanf(nft_ctx_get_output_buffer(nft_ctx), echo_fmt, &nat->dst_handle); - free(echo_fmt); - free(cmd); - nft_ctx_unbuffer_output(nft_ctx); - - nft_ctx_buffer_output(nft_ctx); - cmd = malloc_sprintf("add rule ip %s postrouting ip saddr %s snat to %s", - nft_nat_table, real_ip, fake_ip); - nft_run_cmd_from_buffer(nft_ctx, cmd); - echo_fmt = malloc_sprintf("%s # handle %%d", cmd); - sscanf(nft_ctx_get_output_buffer(nft_ctx), echo_fmt, &nat->src_handle); - free(echo_fmt); - free(cmd); - nft_ctx_unbuffer_output(nft_ctx); - - return nat; + { + char *cmd = malloc_sprintf("add element inet %s nat_addr6 { [%s] }", + nft_nat_table, real_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } + { + char *cmd = malloc_sprintf("add element inet %s nat_map6 { [%s]:[%s] }", + nft_nat_table, fake_p, real_p); + nft_run_cmd_from_buffer(nft_ctx, cmd); + free(cmd); + } } + return nat; } static void free_domain_names(domain_name_t *head) { @@ -323,12 +413,6 @@ static ssize_t read_domain_name(domain_msg_t *msg, ssize_t ptr, domain_name_t **p_name) { domain_name_t *name = NULL; - for (name = msg->name_head; name; name = name->next) { - if (name->ptr == ptr) { - *p_name = name; - return 0; - } - } if (ptr >= msg->len) { *p_name = NULL; return -1; @@ -337,32 +421,42 @@ static ssize_t read_domain_name(domain_msg_t *msg, ssize_t ptr, if ((label_len & 0xc0) == 0) { name = (domain_name_t *)malloc(sizeof(domain_name_t)); name->ptr = ptr; + name->new_ptr = 0; + name->label_len = label_len; ptr++; + name->alias = name; + if (label_len == 0) { - name->match = msg->match_root; + name->match = name->exact_match = msg->match_root; } else { if (ptr + label_len > msg->len) { goto label_fail; } domain_name_t *parent; - ssize_t new_ptr = read_domain_name(msg, ptr + label_len, &parent); + ssize_t next_ptr = read_domain_name(msg, ptr + label_len, &parent); if (!parent) { goto label_fail; } - if (parent->match && parent->match->head) { + if (parent->exact_match && parent->exact_match->head) { domain_set_t *child; - for (child = parent->match->head; child; child = child->next) { + for (child = parent->exact_match->head; child; child = child->next) { if (child->label_len == label_len && !memcmp(child->label, msg->raw + ptr, label_len)) { break; } } - name->match = child; + if (child) { + name->match = name->exact_match = child; + } else { + name->exact_match = NULL; + name->match = parent->match; + } } else { + name->exact_match = NULL; name->match = parent->match; } - ptr = new_ptr; + ptr = next_ptr; } name->next = msg->name_head; @@ -379,28 +473,69 @@ static ssize_t read_domain_name(domain_msg_t *msg, ssize_t ptr, ptr++; if (ptr >= msg->len) { *p_name = NULL; - goto ptr_fail; + return -1; } - ssize_t new_ptr = ((label_len & 0x3f) << 8) | *(msg->raw + ptr); - if (new_ptr >= msg->len) { + size_t ref_ptr = ((label_len & 0x3f) << 8) | *(msg->raw + ptr); + if (ref_ptr >= msg->len) { *p_name = NULL; - goto ptr_fail; + return -1; } ptr++; - if (read_domain_name(msg, new_ptr, p_name) < 0) { - goto ptr_fail; - } else { - return ptr; + + while (ref_ptr < ptr - 2 && (*(msg->raw + ref_ptr) & 0xc0)) { + ref_ptr = + ((*(msg->raw + ref_ptr) & 0x3f) << 8) | *(msg->raw + ref_ptr + 1); } - ptr_fail: - return -1; + for (name = msg->name_head; name; name = name->next) { + if (name->ptr == ref_ptr) { + *p_name = name; + break; + } + } + + if (!name) { + *p_name = NULL; + return -1; + } + return ptr; } else { *p_name = NULL; return -1; } } +static ssize_t copy_domain_name(domain_msg_t *msg, domain_name_t *name, + ssize_t new_ptr) { + if (name->new_ptr) { + if (new_ptr + 2 > msg->len) { + return -1; + } + *(msg->new_raw + new_ptr) = 0xc0 | ((name->new_ptr >> 8) & 0x3f); + *(msg->new_raw + new_ptr + 1) = name->new_ptr & 0xff; + return new_ptr + 2; + } else if (name->label_len == 0) { + if (new_ptr + 1 > msg->len) { + return -1; + } + name->new_ptr = new_ptr; + *(msg->new_raw + new_ptr) = 0; + new_ptr++; + return new_ptr; + } else { + if (new_ptr + 1 + name->label_len > msg->len) { + return -1; + } + name->new_ptr = new_ptr; + memcpy(msg->new_raw + new_ptr, msg->raw + name->ptr, name->label_len + 1); + new_ptr += name->label_len + 1; + if (!name->next) { + return -1; + } + return copy_domain_name(msg, name->next, new_ptr); + } +} + static void client_read(EV_P_ ev_io *w, int revents) { client_ctx_t *client_ctx = (client_ctx_t *)w; @@ -412,6 +547,7 @@ static void client_read(EV_P_ ev_io *w, int revents) { msg->name_head = NULL; msg->raw = malloc(MAX_MESSAGE_SIZE); + msg->new_raw = malloc(MAX_MESSAGE_SIZE); msg->len = recvfrom(client_ctx->fd, msg->raw, MAX_MESSAGE_SIZE, MSG_TRUNC, NULL, NULL); @@ -419,63 +555,151 @@ static void client_read(EV_P_ ev_io *w, int revents) { goto fail; } - dns_header_t *header = (dns_header_t *)msg->raw; + memcpy(msg->new_raw, msg->raw, sizeof(dns_header_t)); + dns_header_t *new_header = (dns_header_t *)msg->new_raw; + + ssize_t new_ptr = MAX_MESSAGE_SIZE; if (msg->len > MAX_MESSAGE_SIZE) { - header->flags = htons(ntohs(header->flags) | DNS_FLAG_TC); + memcpy(msg->new_raw, msg->raw, MAX_MESSAGE_SIZE); + new_header->flags = htons(ntohs(new_header->flags) | DNS_FLAG_TC); goto send; } - ssize_t ptr = sizeof(dns_header_t); + new_header->ns_count = 0; + new_header->ar_count = 0; - for (uint16_t i = 0; i < ntohs(header->qd_count); i++) { + ssize_t ptr = sizeof(dns_header_t); + new_ptr = sizeof(dns_header_t); + uint16_t new_an_count = 0; + + for (uint16_t i = 0; i < ntohs(new_header->qd_count); i++) { domain_name_t *name; - ssize_t new_ptr = read_domain_name(msg, ptr, &name); + ptr = read_domain_name(msg, ptr, &name); + if (ptr < 0) { + goto fail; + } + // Copy the query name to new response + new_ptr = copy_domain_name(msg, name, new_ptr); if (new_ptr < 0) { goto fail; } - ptr = new_ptr; - // Skip type and class + + // Copy type and class + if (ptr + 4 > msg->len) { + goto fail; + } + memcpy(msg->new_raw + new_ptr, msg->raw + ptr, 4); + new_ptr += 4; ptr += 4; } - for (uint16_t i = 0; i < ntohs(header->an_count); i++) { + for (uint16_t i = 0; i < ntohs(new_header->an_count); i++) { domain_name_t *name; - ssize_t new_ptr = read_domain_name(msg, ptr, &name); - if (new_ptr < 0) { + ptr = read_domain_name(msg, ptr, &name); + if (ptr < 0) { goto fail; } - ptr = new_ptr; - + if (ptr + sizeof(dns_answer_header_t) > msg->len) { + goto fail; + } dns_answer_header_t *an_header = (dns_answer_header_t *)(msg->raw + ptr); ptr += sizeof(dns_answer_header_t); - - if (name->match && ntohs(an_header->an_type) == DNS_TYPE_A && - ntohs(an_header->an_class) == DNS_CLASS_IN && - ntohs(an_header->rd_len) == 4) { - // Replace answer with fake ip - - uint32_t *p_addr = (uint32_t *)(msg->raw + ptr); - - uint32_t real_addr = ntohl(*p_addr); - ip_nat_t *nat = find_nat(EV_A, client_ctx->ip_pool, real_addr); - - if (nat) { - *p_addr = htonl(nat->fake); - an_header->an_ttl = htonl(NAT_TTL); - } + size_t record_len = ntohs(an_header->rd_len); + if (ptr + record_len > msg->len) { + goto fail; } - ptr += ntohs(an_header->rd_len); + if (name->alias->match && ntohs(an_header->an_type) == DNS_TYPE_CNAME) { + // Copy CNAME records + domain_name_t *cname; + ptr = read_domain_name(msg, ptr, &cname); + if (ptr < 0) { + goto fail; + } + cname->alias = name->alias; + + new_ptr = copy_domain_name(msg, name, new_ptr); + if (new_ptr < 0) { + goto fail; + } + + dns_answer_header_t *new_an_header = + (dns_answer_header_t *)(msg->new_raw + new_ptr); + + new_an_header->an_type = an_header->an_type; + new_an_header->an_class = an_header->an_class; + new_an_header->an_ttl = an_header->an_ttl; + + new_ptr += sizeof(dns_answer_header_t); + + ssize_t next_new_ptr = copy_domain_name(msg, cname, new_ptr); + if (next_new_ptr < 0) { + goto fail; + } + new_an_header->rd_len = htons(next_new_ptr - new_ptr); + new_ptr = next_new_ptr; + + new_an_count++; + } else if (name->alias->match && + ntohs(an_header->an_class) == DNS_CLASS_IN && + (ntohs(an_header->an_type) == DNS_TYPE_A || + ntohs(an_header->an_type) == DNS_TYPE_AAAA)) { + // Replace address with fake ip, and set a short TTL + new_ptr = copy_domain_name(msg, name, new_ptr); + if (new_ptr < 0) { + goto fail; + } + + if (new_ptr + sizeof(dns_answer_header_t) + record_len > msg->len) { + goto fail; + } + dns_answer_header_t *new_an_header = + (dns_answer_header_t *)(msg->new_raw + new_ptr); + new_ptr += sizeof(dns_answer_header_t); + + new_an_header->an_type = an_header->an_type; + new_an_header->an_class = an_header->an_class; + + sa_family_t family; + if (ntohs(an_header->an_type) == DNS_TYPE_A) { + family = AF_INET; + } else if (ntohs(an_header->an_type) == DNS_TYPE_AAAA) { + family = AF_INET6; + } + + ip_nat_t *nat = nat_find(EV_A, client_ctx->ip_pool, family, + (ip_address_t *)(msg->raw + ptr)); + + new_an_header->rd_len = an_header->rd_len; + if (nat) { + new_an_header->an_ttl = htonl(NAT_TTL); + address_copy(family, (ip_address_t *)(msg->new_raw + new_ptr), + &nat->fake_addr); + } else { + new_an_header->an_ttl = an_header->an_ttl; + address_copy(family, (ip_address_t *)(msg->new_raw + new_ptr), + (ip_address_t *)(msg->raw + ptr)); + } + ptr += record_len; + new_ptr += record_len; + + new_an_count++; + } else { + ptr += record_len; + } } + new_header->an_count = htons(new_an_count); + send: - sendto(client_ctx->server_fd, msg->raw, msg->len, 0, + sendto(client_ctx->server_fd, msg->new_raw, new_ptr, 0, (struct sockaddr *)&client_ctx->client_addr, client_ctx->client_addr_len); fail: free_domain_names(msg->name_head); + free(msg->new_raw); free(msg->raw); close(client_ctx->fd); @@ -621,8 +845,7 @@ static const char *parse_nft_string(const char *s) { #define OPT_UPSTREAM_HOST 0x101 #define OPT_UPSTREAM_PORT 0x102 -#define OPT_FAKE_SET 0x103 -#define OPT_DAEMONIZE 0x104 +#define OPT_DAEMONIZE 0x103 int main(int argc, char *const *argv) { domain_set_t *domain_set = domain_set_new("", 0); @@ -637,15 +860,15 @@ int main(int argc, char *const *argv) { listen_addr.sin_family = AF_INET; upstream_addr.sin_family = AF_INET; - const char *options = "h:p:d:x:"; + const char *options = "H:p:d:4:6:"; struct option long_options[] = { - {"host", required_argument, NULL, 'h'}, + {"host", required_argument, NULL, 'H'}, {"port", required_argument, NULL, 'p'}, {"domain", required_argument, NULL, 'd'}, - {"prefix", required_argument, NULL, 'x'}, + {"ipv4-prefix", required_argument, NULL, '4'}, + {"ipv6-prefix", required_argument, NULL, '6'}, {"upstream-host", required_argument, NULL, OPT_UPSTREAM_HOST}, {"upstream-port", required_argument, NULL, OPT_UPSTREAM_PORT}, - {"fake-set", required_argument, NULL, OPT_FAKE_SET}, {"daemonize", no_argument, NULL, OPT_DAEMONIZE}, {NULL, 0, NULL, 0}}; int option_index; @@ -660,7 +883,7 @@ int main(int argc, char *const *argv) { while ((o = getopt_long(argc, argv, options, long_options, &option_index)) != -1) { switch (o) { - case 'h': + case 'H': if (inet_pton(AF_INET, optarg, &listen_addr.sin_addr) != 1) { goto fail; } else { @@ -677,26 +900,49 @@ int main(int argc, char *const *argv) { goto fail; } break; - case 'x': { + case '4': { char *sep = strchr(optarg, '/'); if (!sep) { goto fail; } *sep = 0; - uint32_t af_addr; - unsigned pf_len; - if (inet_pton(AF_INET, optarg, &af_addr) != 1 || - sscanf(sep + 1, "%u", &pf_len) == -1 || pf_len > 30) { + unsigned prefix_len; + if (inet_pton(AF_INET, optarg, &ip_pool.net) != 1 || + sscanf(sep + 1, "%u", &prefix_len) == -1 || prefix_len > 30) { goto fail; } - uint32_t pf = ntohl(af_addr); - uint32_t pf_mask = ~((1 << (32 - pf_len)) - 1); - if (pf & ~pf_mask) { + ip_pool.net_mask.s_addr = htonl(~((1UL << (32 - prefix_len)) - 1)); + ip_pool.net.s_addr &= ip_pool.net_mask.s_addr; + + prefix_set++; + break; + } + case '6': { + char *sep = strchr(optarg, '/'); + if (!sep) { goto fail; } - ip_pool_init(&ip_pool, pf, pf_mask); - prefix_set = 1; + *sep = 0; + + unsigned prefix_len; + if (inet_pton(AF_INET6, optarg, &ip_pool.net6) != 1 || + sscanf(sep + 1, "%u", &prefix_len) == -1 || prefix_len > 64) { + goto fail; + } + for (int i = 0; i < 4; i++) { + if (prefix_len <= i << 5) { + ip_pool.net6_mask.s6_addr32[i] = 0UL; + } else if (prefix_len >= (i + 1) << 5) { + ip_pool.net6_mask.s6_addr32[i] = 0xffffffffUL; + } else { + ip_pool.net6_mask.s6_addr32[i] = + htonl(~((1UL << (32 - (prefix_len - (i << 5)))) - 1)); + } + ip_pool.net6.s6_addr32[i] &= ip_pool.net6_mask.s6_addr32[i]; + } + + prefix_set++; break; } case OPT_UPSTREAM_HOST: @@ -710,33 +956,6 @@ int main(int argc, char *const *argv) { goto fail; } break; - case OPT_FAKE_SET: { - const char *s = optarg; - size_t t[3]; - t[0] = parse_nft_string(s) - s; - if (s[t[0]] != '#' || t[0] == 0) { - goto fail; - } - - t[1] = parse_nft_string(s + t[0] + 1) - s; - if (s[t[1]] != '#' || t[1] == t[0] + 1) { - goto fail; - } - - t[2] = parse_nft_string(s + t[1] + 1) - s; - if (s[t[2]] != '\0' || t[2] == t[1] + 1) { - goto fail; - } - - char *fake_set = malloc(t[2] + 1); - - memcpy(fake_set, s, t[2] + 1); - fake_set[t[0]] = ' '; - fake_set[t[1]] = ' '; - nft_fake_set = fake_set; - - break; - } case OPT_DAEMONIZE: if (daemon(0, 0) == -1) { domain_set_fini(domain_set); @@ -750,21 +969,34 @@ int main(int argc, char *const *argv) { case '?': fprintf(stderr, "Unrecognized option: %s\n", optarg); opterr = 1; + // clang-format off fprintf(stderr, - "Usage: %s -h LISTEN_HOST -p LISTEN_PORT\n" - " -x FAKE_IP_PREFIX [-d DOMAIN]\n" - " --upstream-host UPSTREAM_HOST\n" - " --upstream-port UPSTREAM_PORT\n" - " [ --fake-set FAKE_SET ]\n" - " [ --daemonize ]\n", + "Usage: %s [OPTIONS]\n\n" + "Required options:\n" + " -H, --host ADDR IPv4 address to listen on for DNS queries.\n" + " -4, --ipv4-prefix PREFIX IPv4 fake address prefix (e.g. 100.64.0.0/24).\n" + " -6, --ipv6-prefix PREFIX IPv6 fake address prefix (e.g. fd00::/64).\n" + " --upstream-host ADDR IPv4 address of upstream DNS server.\n\n" + "Optional options:\n" + " -p, --port PORT UDP port to listen on (default: 53).\n" + " --upstream-port PORT UDP port of upstream DNS server (default: 53).\n" + " -d, --domain DOMAIN Domain to rewrite (may be given multiple times).\n" + " --daemonize Run in background (daemonize).\n", argv[0]); + // clang-format on break; } } - if (!opterr && !(listen_host_set && prefix_set && upstream_host_set)) { + if (!opterr && !(listen_host_set && prefix_set == 2 && upstream_host_set)) { + // clang-format off fprintf(stderr, - "LISTEN_ADDR, FAKE_IP_PREFIX, UPSTREAM_HOST must be set.\n"); + "Missing required options. You must provide:\n" + " -H / --host (listen address),\n" + " -4 / --ipv4-prefix (fake IPv4 prefix),\n" + " -6 / --ipv6-prefix (fake IPv6 prefix),\n" + " --upstream-host (upstream DNS IPv4 address).\n"); + // clang-format on opterr = 1; } @@ -778,26 +1010,11 @@ int main(int argc, char *const *argv) { nft_ctx = nft_ctx_new(NFT_CTX_DEFAULT); if (!nft_ctx) { - perror("Failed to create nftables context."); + perror("Failed to create nftables context"); } - nft_ctx_output_set_flags(nft_ctx, - NFT_CTX_OUTPUT_HANDLE | NFT_CTX_OUTPUT_ECHO); + nft_ctx_output_set_flags(nft_ctx, NFT_CTX_OUTPUT_ECHO); - char *cmd = malloc_sprintf("add table ip %s", nft_nat_table); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - - cmd = malloc_sprintf("add chain ip %s prerouting" - "{ type nat hook prerouting priority dstnat; }", - nft_nat_table); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - - cmd = malloc_sprintf("add chain ip %s postrouting" - "{ type nat hook postrouting priority srcnat; }", - nft_nat_table); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); + ip_pool_init(&ip_pool); struct ev_loop *loop = ev_default_loop(0); @@ -833,12 +1050,6 @@ int main(int argc, char *const *argv) { ip_pool_fini(&ip_pool); - cmd = malloc_sprintf("delete table ip %s", nft_nat_table); - nft_run_cmd_from_buffer(nft_ctx, cmd); - free(cmd); - - free(nft_fake_set); - nft_ctx_free(nft_ctx); domain_set_fini(domain_set); diff --git a/src/rules.nft b/src/rules.nft new file mode 100644 index 0000000..b779219 --- /dev/null +++ b/src/rules.nft @@ -0,0 +1,37 @@ +table inet dotp { + map nat_map { + type ipv4_addr : ipv4_addr + } + + map nat_map6 { + type ipv6_addr : ipv6_addr + } + + chain prerouting { + type nat hook prerouting priority dstnat; policy accept; + dnat ip to ip daddr map @nat_map + dnat ip6 to ip6 daddr map @nat_map6 + } + + set nat_addr { + type ipv4_addr + } + + set nat_addr6 { + type ipv6_addr + } + + chain proxy { + meta mark set 1 + meta l4proto tcp counter tproxy ip to 127.0.0.1:2040 accept + meta l4proto udp counter tproxy ip to 127.0.0.1:2040 accept + meta l4proto tcp counter tproxy ip6 to [::1]:2040 accept + meta l4proto udp counter tproxy ip6 to [::1]:2040 accept + } + + chain proxy_dst_check { + type filter hook prerouting priority dstnat + 1; policy accept; + ip daddr @nat_addr goto proxy + ip6 daddr @nat_addr6 goto proxy + } +}