From e3f64ceae592ed8cb9dc98d1e49e4577cd0838a5 Mon Sep 17 00:00:00 2001 From: RGBCube Date: Mon, 9 Jun 2025 14:55:06 +0300 Subject: [PATCH] blog: add ngiNIX post --- site.ts | 38 +++-- site/blog/2025-06-09-nginix.md | 263 +++++++++++++++++++++++++++++++++ 2 files changed, 291 insertions(+), 10 deletions(-) create mode 100644 site/blog/2025-06-09-nginix.md diff --git a/site.ts b/site.ts index cbd568a..38ab77b 100644 --- a/site.ts +++ b/site.ts @@ -66,17 +66,36 @@ site.process([".html"], (pages) => }); document.querySelectorAll("pre code").forEach((code) => { - const element = code.parentElement!; - const wrapper = document.createElement("div"); + const matches = code.innerHTML.match(/\(\(\(\d+\)\)\)/g); + if (matches) { // CALLOUTS + let newHTML = code.innerHTML; - element.classList.add("transform-[rotateX(180deg)]"); - wrapper.classList.add( - "transform-[rotateX(180deg)]", - "overflow-x-auto", - ); + matches.forEach((match) => { + console.log( + `${match.replaceAll(/\(|\)/g, "")}`, + ); + newHTML = newHTML.replace( + match, + `${match.replaceAll(/\(|\)/g, "")}`, + ); + }); - element.parentNode!.insertBefore(wrapper, element); - wrapper.appendChild(element); + code.innerHTML = newHTML; + } + + { // ROTATION + const element = code.parentElement!; + const wrapper = document.createElement("div"); + + element.classList.add("transform-[rotateX(180deg)]"); + wrapper.classList.add( + "transform-[rotateX(180deg)]", + "overflow-x-auto", + ); + + element.parentNode!.insertBefore(wrapper, element); + wrapper.appendChild(element); + } }); document @@ -145,7 +164,6 @@ site.process([".html"], (pages) => if (addFooter) { const hr = document.createElement("hr"); - hr.classList.add("my-1.5"); const li = document.createElement("li"); li.id = footnoteId; diff --git a/site/blog/2025-06-09-nginix.md b/site/blog/2025-06-09-nginix.md new file mode 100644 index 0000000..8edcdc8 --- /dev/null +++ b/site/blog/2025-06-09-nginix.md @@ -0,0 +1,263 @@ +--- +title: NgiNix +description: How the Nixpkgs Nginx differs and why it's better. + +unlisted: true + +tags: +- nginx +- nix/os +--- + +[Nginx](https://nginx.org) automatically creates +[ETag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/ETag) +headers for static files. But it does it in such a way that is suboptimal. + +I'll explain why that is, and how the +[Nixpkgs distribution of Nginx](https://github.com/NixOS/nixpkgs/blob/8c742e834fd39a9b912a9237b5a6453cb4ec222b/pkgs/servers/http/nginx/generic.nix) +does it better. + +First, let's take a look at how +[Nginx sets Etags](https://github.com/nginx/nginx/blob/5b8a5c08ce28639e788734b2528faad70baa113c/src/http/ngx_http_core_module.c#L1681-L1716): + +```c +// in ngx_http_set_etag @ src/http/ngx_http_core_module.c:1681 + +(((1))) +etag = ngx_list_push(&r->headers_out.headers); (((1))) +if (etag == NULL) { + return NGX_ERROR; +} + +(((2))) +etag->hash = 1; +etag->next = NULL; +ngx_str_set(&etag->key, "ETag"); + +(((3))) +etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3); +if (etag->value.data == NULL) { + etag->hash = 0; + return NGX_ERROR; +} + +(((4))) +etag->value.len = ngx_sprintf(etag->value.data, "\"%xT-%xO\"", + r->headers_out.last_modified_time, + r->headers_out.content_length_n) + - etag->value.data; +``` + +1. Here we create an empty header entry in the requests's outgoing headers. + +2. We initialize it, and set its key to "ETag". + +3. We then allocate memory for the ETag value from the current request's arena. + The size is calculated to be large enough to hold the timestamp + (`NGX_TIME_T_LEN`), the content length (`NGX_OFF_T_LEN`), and extra + characters (two quotes " and a hyphen -). + +4. And then we fill the header's value to a string like `"5f7e1b2a-1c8f"` using + the last modified time of the file being served and its content length. + Something weird is `- etag->value.data`, which is required because + `ngx_sprintf` returns a pointer to the end of the string. We do it to get the + start. + +Result: The outgoing request now contains an `Etag` header, which is based off +of the file's length and last modified date. + +## But wait... + +What if we use a tool that aims to produce +[reproducible build artifacts](https://reproducible-builds.org/) to build our +website, and we just so happen to modify a file in such a way that doesn't +change the size of its contents? (more common than you think) + +> For example: +> Nix[^[A hermetic, declarative, reproducible and input(and soon content)-addressed build system.](https://nix.dev/)], +> which _always_ sets the last-modified timestamps for files under the Nix +> store[^The place Nix stores all build results, _usually_ `/nix/store`. Every +> "Nix store path" is represented like so: +> `/nix/store/6bxcxc6xvg5xv70z55adcwhgik5m41a0-package-1.0.0`, where the hash is +> derived from its inputs] to `1`, which corresponds to January 1st, 1970, +> 00:00:01 UTC. + +In that case, neither the last modified date or the content length would same. +So the `ETag` header would stay **constant, which would make the client assume +the asset has not changed**. + +## Silently Broken + +The caching on your website being silently broken is a monumental issue, and has +to be fixed. The way Nixpkgs does it is through +[this patch:](https://github.com/NixOS/nixpkgs/blob/8c742e834fd39a9b912a9237b5a6453cb4ec222b/pkgs/servers/http/nginx/nix-etag-1.15.4.patch) + +```patch +This patch makes it possible to serve static content from Nix store paths, by +using the hash of the store path for the ETag header. + +diff --git a/src/http/ngx_http_core_module.c b/src/http/ngx_http_core_module.c +index 97a91aee2..2d07d71e6 100644 +--- a/src/http/ngx_http_core_module.c ++++ b/src/http/ngx_http_core_module.c +@@ -1676,6 +1676,8 @@ ngx_http_set_etag(ngx_http_request_t *r) + { + ngx_table_elt_t *etag; + ngx_http_core_loc_conf_t *clcf; + (((2))) ++ u_char *real, *ptr1, *ptr2; ++ ngx_err_t err; + + clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); + +@@ -1692,16 +1694,82 @@ ngx_http_set_etag(ngx_http_request_t *r) + etag->next = NULL; + ngx_str_set(&etag->key, "ETag"); + + (((1))) +- etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3); +- if (etag->value.data == NULL) { +- etag->hash = 0; +- return NGX_ERROR; ++ // Upstream nginx uses file mod timestamp and content-length for Etag, but ++ // files in the Nix store have their timestamps reset, so that doesn't work. ++ // Instead, when serving from the Nix store, we use the hash from the store ++ // path and content-length. ++ // ++ // Every file in under the given store path will share the same store path ++ // hash. It is fine to serve different resources with the same Etag, but ++ // different representations of the same resource (eg the same file, but ++ // gzip-compressed) should have different Etags. Thus, we also append ++ // content-length, which should be different when the response is compressed ++ ++ err = ngx_errno; (((3))) ++ real = ngx_realpath(clcf->root.data, NULL); (((4))) ++ ngx_set_errno(err); (((5))) ++ ++ #define NIX_STORE_DIR "@nixStoreDir@" (((6))) ++ #define NIX_STORE_LEN @nixStoreDirLen@ ++ ++ if (r->headers_out.last_modified_time == 1 (((7))) ++ && real != NULL ++ && !ngx_strncmp(real, NIX_STORE_DIR, NIX_STORE_LEN) ++ && real[NIX_STORE_LEN] == '/' ++ && real[NIX_STORE_LEN + 1] != '\0') ++ { ++ // extract the hash from a path formatted like ++ // /nix/store/hashhere1234-pname-1.0.0 ++ // +1 to skip the leading / ++ ptr1 = real + NIX_STORE_LEN + 1; (((8))) ++ ++ ptr2 = (u_char *) ngx_strchr(ptr1, '-'); ++ ++ if (ptr2 == NULL) { ++ ngx_free(real); ++ etag->hash = 0; ++ return NGX_ERROR; ++ } ++ ++ *ptr2 = '\0'; (((9))) ++ ++ // hash + content-length + quotes and hyphen. Note that the ++ // content-length part of the string can vary in length. ++ etag->value.data = ngx_pnalloc(r->pool, ngx_strlen(ptr1) + NGX_OFF_T_LEN + 3); ++ ++ if (etag->value.data == NULL) { ++ ngx_free(real); ++ etag->hash = 0; ++ return NGX_ERROR; ++ } ++ ++ ++ // set value.data content to "{hash}-{content-length}" (including quote ++ // marks), and set value.len to the length of the resulting string ++ etag->value.len = ngx_sprintf(etag->value.data, "\"\%s-%xO\"", ++ ptr1, ++ r->headers_out.content_length_n) ++ - etag->value.data; ++ ++ ngx_http_clear_last_modified(r); ++ } else { (((10))) ++ // outside of Nix store, use the upstream Nginx logic for etags ++ ++ etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3); ++ ++ if (etag->value.data == NULL) { ++ ngx_free(real); ++ etag->hash = 0; ++ return NGX_ERROR; ++ } ++ ++ etag->value.len = ngx_sprintf(etag->value.data, "\"%xT-%xO\"", ++ r->headers_out.last_modified_time, ++ r->headers_out.content_length_n) ++ - etag->value.data; + } + +- etag->value.len = ngx_sprintf(etag->value.data, "\"%xT-%xO\"", +- r->headers_out.last_modified_time, +- r->headers_out.content_length_n) +- - etag->value.data; ++ ngx_free(real); + + r->headers_out.etag = etag; +``` + +The way this works is as follows: + +1. The old logic, explained above is deleted. + +2. We create 4 new variables, 3 used for path manipulation and 1 for storing + `errno`. + +3. Save the current value of `errno` (ugh) to a local. + +4. Then try to resolve `clcf->root.data`. `clcf->root.data` is the path + specified in the root directive of the Nginx configuration and could be a + symbolic link (e.g., it could be `/var/www` which might be a symlink to a Nix + store path[^Please don't do this.]). `ngx_realpath` resolves all symbolic + links and returns the canonical, absolute path. This is necessary to reliably + check if the path is inside the Nix store. + +5. Restore the value of `errno`, as the previous function call might have + changed it. We handle the error case of `ngx_realpath` later. + +6. These are **Nix placeholders**, aka they get replaced by the path of the Nix + store at build time. The reason we do not hardcode `/nix/store` is because + [Nix's store path, when on the local filesystem, is arbitrary](https://nix.dev/manual/nix/2.29/store/store-path.html#store-directory). + This makes our patch portable. + +7. **The main condition:** We check if the `r->headers_out.last_modified_time` + is `1`, which is what Nix sets file's last modified times to, we then check + if the canonical path resolution has succeeded (`real != NULL`), if the + canonical path starts with the Nix store path (`NIX_STORE_DIR`), and if it + has anything file after it (`@[next] != '/' && @[next + 1] != NULL`). + +8. (If the condition is true) We then extract the hash. A path like + `/nix/store/6bxcxc6xvg5xv70z55adcwhgik5m41a0-package-1.0.0/bin/package` gets + turned into `6bxcxc6xvg5xv70z55adcwhgik5m41a0`. + +9. (If the condition is true) _SIDENOTE:_ This is a nice hack for one of many + design failures in C, this one being NULL-terminated strings. We just place a + `0` byte at the index we want to end our string out for efficiency, instead + of creating a copy. + +10. (If the condition is false) We continue on with the old logic, as explained + at the top. + +We then set the value of the `ETag` header to `"{hash}-{content-length}"`, the +reason for the content length being explained in the comment in the patch: + +> It is fine to serve different resources with the same Etag, but different +> representations of the same resource (eg the same file, but gzip-compressed) +> should have different Etags. Thus, we also append content-length, which should +> be different when the response is compressed + +This way, by depending on the Nix store path instead of the last modification +date, we can serve files more reliably and with less[^No, not none. You can +still cause unreproducible builds with Nix and have the store path not change, +at least with input-addressed derivations (derivation = the smallest Nix build +unit).] issues. + +I hope you learned something in this blog post, and perhaps a new way to +fingerprint Nginx installs :).