feat: employ cobalt to build notebook

This commit is contained in:
Nat 2024-06-11 17:00:42 -07:00
parent 50dc5dfe40
commit fa9daa33fe
Signed by: nat
GPG Key ID: B53AB05285D710D6
17 changed files with 509 additions and 64 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
public/

18
_cobalt.yml Normal file
View File

@ -0,0 +1,18 @@
source: .
destination: public
site:
title: nats.solutions
description: What Nat's working on
base_url: https://nats.solutions
posts:
rss: rss.xml
default:
is_draft: false
ignore:
- "*~"
- "*.swp"
- ".*.swp"
- "*.swo"
- ".*.swo"
- "!.domains"
- "deploy"

10
_defaults/pages.md Normal file
View File

@ -0,0 +1,10 @@
---
layout: default.liquid
---
## Blog!
{% for post in collections.posts.pages %}
#### {{post.title}}
[{{ post.title }}]({{ post.permalink }})
{% endfor %}

7
_defaults/posts.md Normal file
View File

@ -0,0 +1,7 @@
---
layout: post.liquid
title: Entry
is_draft: true
---

22
_includes/footer.liquid Normal file
View File

@ -0,0 +1,22 @@
<footer>
<p class="dinkus" aria-hidden="true">* * *</p>
<p>
Hand-crafted on the land of the sovereign
<a href="https://www.syilx.org/">Syilx Okanagan Nation</a>.
</p>
<p class="license">
Unless otherwise noted, content is shared under the
<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/"
rel="license">
CC-BY-NC-SA 4.0
</a>.<br>
All code is shared under the
<a href="https://www.gnu.org/licenses/agpl-3.0.en.html"
rel="license">
AGPL
</a> (<a href="https://git.nats.solutions/nat/nats.solutions">source</a>).
<img src="/assets/img/crt.png" alt="Isometric illustration of me sitting at a desk working on a desktop computer with a CRT monitor">
</p>
</footer>

8
_includes/head.liquid Normal file
View File

@ -0,0 +1,8 @@
<title>nats.solutions</title>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" type="text/css" href="/assets/css/styles.css">
<!-- Search engine considerations -->
<meta name="googlebot" content="noindex">

14
_layouts/default.liquid Normal file
View File

@ -0,0 +1,14 @@
<!DOCTYPE html>
<html>
<head>
{% include 'head.liquid' %}
</head>
<body>
<div class="main-container">
<main>
{{ page.content }}
{% include 'footer.liquid' %}
</main>
</div>
</body>
</html>

20
_layouts/post.liquid Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
{% include 'head.liquid' %}
</head>
<body>
<div class="main-container">
<main>
<nav>
<a href="/">&larr; return</a>
</nav>
<h1>{{ page.title }}</h1>
<p><em>Published on {{ page.published_date | date: '%Y-%m-%d' }}</em></p>
{{ page.content }}
{% include 'footer.liquid' %}
</main>
</div>
</body>
</html>

97
assets/css/styles.css Normal file
View File

@ -0,0 +1,97 @@
* {
font-family: "Lucida Sans Unicode", "Lucida Grande", sans-serif;
}
h1,h2,h3,h4,h5,h6 {
font-family: "Lucida Console", Monaco, monospace;
}
code {
font-family: "Lucida Console", Monaco, monospace;
}
a { color: #059341; text-decoration: none; }
a:hover { color: #27ae60; text-decoration: underline; }
a.calathea-404 { color: red; }
a.calathea-404:hover { color: darkred; }
img {
height: auto;
margin-left: auto;
margin-right: auto;
display: block;
}
img.hero-img {
width: 50%;
}
pre {
border: 2px solid black;
border-radius: 5px;
overflow-y: scroll;
padding: 10px;
}
.main-container {
width: 60%;
margin:auto;
margin-top: 5em;
border: 5px double black;
}
main {
padding: 40px;
}
.dinkus {
margin-top: 2em;
}
footer {
text-align: center;
}
.incoming-container {
width: 100%;
display: flex;
justify-content: center;
flex-direction: row;
}
.calathea-incoming {
list-style: none;
padding-left: 0;
display: inline;
}
.calathea-incoming > li {
display: inline;
padding-right: 10px;
}
.calathea-incoming > li:first-child {
padding-left: 10px;
}
@media screen and (max-width: 1000px) {
.main-container {
width: 90%;
}
}
@media screen and (max-width: 768px) {
img.hero-img {
width: 80%;
}
.main-container {
border: none;
width: 100%;
margin-top: 2.5em;
}
main {
padding: 20px;
}
}

BIN
assets/img/crt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

BIN
assets/img/day.kra Normal file

Binary file not shown.

BIN
assets/img/day.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

28
atom.liquid Normal file
View File

@ -0,0 +1,28 @@
---
permalink: /atom.xml
---
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Nat's Notebook</title>
<link href="https://www.nats.solutions/"/>
<author>
<name>Nat</name>
<uri>https://www.nats.solutions</uri>
</author>
<link rel="self" href="https://www.nats.solutions/"/>
<updated>{{ collections.posts.pages[0].published_date | date: "%FT%TZ" }}</updated>
<id>https://www.nats.solutions</id>
{% for post in collections.posts.pages %}
<entry>
<title>{{ post.title }}</title>
<link rel="alternate" href="https://www.nats.solutions/{{ post.permalink }}"/>
<id>https://www.nats.solutions/{{ post.permalink }}</id>
<updated>{{ post.published_date | date: "%FT%TZ" }}</updated>
<content src="https://www.nats.solutions/{{ post.permalink }}"></content>
<summary type="html">
{{ post.description }}
</summary>
<category term="{{ post.categories | join: "/" }}" />
</entry>
{% endfor %}
</feed>

1
deploy
View File

@ -1,3 +1,4 @@
#!/bin/bash
cobalt build
scp -rp public/* root@nats.solutions:/var/www/nats.solutions/

28
index.md Normal file
View File

@ -0,0 +1,28 @@
---
layout: default.liquid
---
<img
src="/assets/img/day.png"
class="hero-img"
alt="An illustration of a smiling figure (me) sitting beside a pond with a duck, under a tree with no leaves"/>
# Hi, my name is Nat
I'm a computer and environmental scientist.
I [build software](https://git.nats.solutions/nat) and computer systems. Interested in working with me? I'd be happy to provide a resume on request. I also do web development and system administration pro bono for certain causes. If you need help with your next computer thing, send me an email at natscott [at] nats [dot] solutions!
## Principles
- Building complex systems means having the humility to learn from nature
- Choosing the path of least resistance is always a weakness to be negotiated with
- Computers should be designed by and made for all people
- Good complexity is the emergent property of simple systems
## Virtual lab notebook
I practice [open-notebook science](https://en.wikipedia.org/wiki/Open-notebook_science). As I tackle problems and learn new things, I strive to write down my experiences, roughly following the scientific method, and share them here. You can [subscribe to my Atom feed](/atom.xml) to follow updates.
{% for post in collections.posts.pages %}
- {{ post.published_date | date: '%Y-%m-%d' }} - [{{ post.title }}]({{ post.permalink }})
{% endfor %}

View File

@ -0,0 +1,255 @@
---
layout: post.liquid
title: "Correcting an incorrectly georeferenced raster dataset"
published_date: 2024-06-11 12:00:00 +0000
categories:
- notebook
tags:
- gis
---
## Objective
To extend the [`fccsmap`](https://github.com/pnwairfire/fccsmap) Python package to support looking up FCCS fuelbed information within Canada.
### Context
`fccsmap` is a Python package developed by Pacific North West Airfire used to look up FCCS fuelbed data within continental United States and Alaska. It does this by loading a georeferenced raster dataset and checking what value sits at the relevant coordinates.
Prior work includes Miles Epstein adding the dataset `fccs_canada.nc` to `fccsmap` and wiring it up for use. However, `fccsmap` fails to use the dataset; when it's loaded, dependencies complain that it lacks a geotransform matrix. It does indeed have a geotransform matrix, however, it doesn't appear to be recognized by any GIS software I've used. Namely, the dataset has the following property in its header:
```
Band1#transform={1000,0,-2341249.999999993,0,-1000,3833423.97736665}
```
Clearly, `fccsmap` fails to recognize the geotransformation of the dataset, while it does recognize it for the American datasets. Interestingly, PanopolyJ fails to recognize the geotransformations for both the American and Canadian datasets. Meanwhile, QGIS does recognize the geotransformation, however, the coordinates it reports are well off where they should be (the southern tip of Vancouver Island is listed as being almost 30 degrees east of where it's expected to be). Notably, the American FCCS datasets do not have this issue
### Definitions and ideas
* Raster data: data in "raster" form; think raster images. Data is layed out in a grid of "pixels," where each pixel contains or references some data.
* Georeferencing and geotransformation: the process of applying a matrix transformation (the "geotransformation") to the raster grid in order to make the data correspond to geographic coordinates. Raster data is essentially a matrix, and we can apply matrix transformations to it accordingly.
## Approach 1: Upgrading to a later version of GDAL
When using `fccs_canada.nc`, you get the following error:
```
>>> ca_lookup = FccsLookUp(is_canada=True)
>>> ca_lookup.filename
'/usr/local/lib/python3.8/dist-packages/fccsmap/data/fccs_canada.nc'
>>> ca_lookup.look_up({'type': 'MultiPoint', 'coordinates':[[51, -116]]})
/usr/local/lib/python3.8/dist-packages/rasterio/__init__.py:304: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.8/dist-packages/fccsmap/lookup.py", line 179, in look_up
stats = self._look_up(new_geo_data)
File "/usr/local/lib/python3.8/dist-packages/fccsmap/lookup.py", line 305, in _look_up
stats = zonal_stats(s, self.gridfile_specifier,
File "/usr/local/lib/python3.8/dist-packages/rasterstats/main.py", line 31, in zonal_stats
return list(gen_zonal_stats(*args, **kwargs))
File "/usr/local/lib/python3.8/dist-packages/rasterstats/main.py", line 156, in gen_zonal_stats
fsrc = rast.read(bounds=geom_bounds)
File "/usr/local/lib/python3.8/dist-packages/rasterstats/io.py", line 289, in read
win = bounds_window(bounds, self.affine)
File "/usr/local/lib/python3.8/dist-packages/rasterstats/io.py", line 150, in bounds_window
row_start, col_start = rowcol(w, n, affine)
File "/usr/local/lib/python3.8/dist-packages/rasterstats/io.py", line 141, in rowcol
r = int(op((y - affine.f) / affine.e))
OverflowError: cannot convert float infinity to integer
```
This error is reminiscent of [this issue](https://github.com/corteva/rioxarray/discussions/605) in the rioxarray package, which the developers identified as being a bug with gdal, fixed in [this pull request](https://github.com/OSGeo/gdal/pull/6663). It's possible updating to gdal version 3.6.1 would fix the issue. I am indeed using an outdated version of GDAL.
To test this, I decided to install the latest version of `fccsmap` using the latest version of GDAL in a Docker container, using the following Dockerfile:
```
FROM ubuntu:22.04
RUN cd /root
# installing tzdata, which is a dependency of one of the following packages,
# needs a little configuration to be run non-interactively. This manually
# sets the timezone data it expects
RUN ln -fs /usr/share/zoneinfo/America/Vancouver /etc/localtime
# Setting DEBIAN_FRONTEND here prevents undesired interactive prompts when
# installing certain packages (namely tzdata)
RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
apt-get install -y \
python3 python3-dev python3-pip python3-venv\
libnetcdf-dev libproj-dev libgdal-dev \
python3-numpy python3-gdal \
libxml2-dev libxslt1-dev
# Create and activate python virtual environment
RUN python3 -m venv /root/venv
ENV PATH="/root/venv/bin/activate:$PATH"
# Install pip dependencies
RUN pip install numpy
RUN export CPLUS_INCLUDE_PATH=/usr/include/gdal
RUN export CPLUS_INCLUDE_PATH=/usr/include/gdal
RUN pip install gdal==`gdal-config --version`
RUN pip install --no-binary gdal --extra-index https://pypi.airfire.org/simple fccsmap==4.1.2
```
Unfortunately, using the latest version of GDAL didn't resolve the issue, and it failed with a similar error.
## Approach 2: Identifying why the geotransformation isn't recognized
Why isn't GDAL recognizing the geotransformation? I suspect it's because the netCDF driver is searching for it somewhere it's not. To find out, we can trace the callback stack to see where specifically it's looking. Recall from earlier:
```
>>> ca_lookup.look_up({'type': 'MultiPoint', 'coordinates':[[51, -116]]})
/usr/local/lib/python3.8/dist-packages/rasterio/__init__.py:304: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
```
Following the chain of dependencies, we find [this line](https://github.com/rasterio/rasterio/blob/1.3.10/rasterio/__init__.py#L304) which seems to be throwing the warning:
```
# rasterio_v1.3/__init__.py
dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
```
The DatasetReader class can in turn be found in [`rasterio/io.py`](https://github.com/rasterio/rasterio/blob/1.3.10/rasterio/io.py), which extends DatasetReaderBase in [`rasterio/_io.pxd`](https://github.com/rasterio/rasterio/blob/1.3.10/rasterio/_io.pxd). DatasetReaderBase extends DatasetBase and this is pretty much where our journey in Pythonland ends because from here on out everything's defined in the C++ GDAL library.
We can get a bit closer by searching the codebase for `NotGeoreferencedWarning`, which takes us to `rasterio/_base.pyx` and the `read_transform` method:
```
def read_transform(self):
"""Return the stored GDAL GeoTransform"""
cdef double gt[6]
if self._hds == NULL:
raise ValueError("Null dataset")
err = GDALGetGeoTransform(self._hds, gt)
if err == GDALError.failure and not self._has_gcps_or_rpcs():
warnings.warn(
("Dataset has no geotransform, gcps, or rpcs. "
"The identity matrix will be returned."),
NotGeoreferencedWarning)
return [gt[i] for i in range(6)]
```
Some earlier investigation suggests that bypassing this issue by georeferencing the data with GCPs isn't an option, but maybe it's worth looking into RPCs if this rabbit hole doesn't take us anywhere
The key here is `GDALGetGeoTransform`, which is in fact a reference to a C function; one that takes a representation of a dataset and a pointer to a location in memory where the geotransform matrix will be loaded. It's defined in the [`gdal.h` header](https://github.com/OSGeo/gdal/blob/99870b9e9352f31a71d29ff4f9e5a8ae4c866891/gcore/gdal.h#L1186)
This is effectively a dead end for me; tracing the GDAL codebase adds a lot of complexity to this project, particularly now that I have a few other leads that might work better
## Approach 3: Manually configuring the GeoTransform
Approach 2 taught me a lot about how rasterio and GDAL work under the hood. `fccsmap` exposes the GDAL dataset reader, and I might be able to leverage that to dynamically configure the dataset. Having done that, I might be able to export it as well.
```
import rioxarray
import rasterio
from fccsmap.lookup import FccsLookUp
old_ds_filename = FccsLookUp(is_canada=True)._filename
old_ds = rasterio.open(old_ds_filename)
## /usr/local/lib/python3.10/dist-packages/rasterio/__init__.py:304: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
## dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
# One-liner:
# new_ds = rasterio.open('./modified_fccs_canada.nc', 'w', width=old_ds.width, height=old_ds.height, count=old_ds.count, nodata=0, dtype='float32', crs=old_ds.crs, transform=rasterio.transform.Affine(1000,0,-2341249.999999993,0,-1000,3833423.97736665))
new_ds = rasterio.open('./modified_fccs_canada.nc', 'w',
driver='netCDF',
dtype='float32',
width=old_ds.width,
height=old_ds.height,
crs=old_ds.crs,
transform=rasterio.transform.Affine(1000,0,-2341249.999999993,0,-1000,3833423.97736665),
count=old_ds.count,
nodata=0)
## Traceback (most recent call last):
## File "<stdin>", line 1, in <module>
## File "/usr/local/lib/python3.10/dist-packages/rasterio/env.py", line 451, in wrapper
## return f(*args, **kwds)
## File "/usr/local/lib/python3.10/dist-packages/rasterio/__init__.py", line 230, in open
## raise RasterioIOError(
## rasterio.errors.RasterioIOError: Blacklisted: file cannot be opened by driver 'netCDF' in 'w' mode
```
`rasterio` raises such a `RasterioIOError` when the driver doesn't support writing as described in the [Writing Datasets section of the `rasterio` documentation](https://rasterio.readthedocs.io/en/latest/topics/writing.html). The implications of this aren't immediately obvious; if you leave off the `driver='netCDF'` line, this code works fine. Presumably, in this case, it's not writing in NetCDF format. Instead, it's probably writing in GeoTIFF, which is better supported (new lead: can we use GTIFF instead of NetCDF?).
To quote the documentation:
> Some other formats that are writable by GDAL can also be written by Rasterio. These use an `IndirectRasterUpdater` which does not create directly but uses a temporary in-memory dataset and `GDALCreateCopy()` to produce the final output.
The [Raster drivers page in the GDAL documentation](https://gdal.org/drivers/raster/index.html) lists the NetCDF driver as one supporting "Creation," though it doesn't elaborate on what that means. The documentation for the driver itself claims the NetCDF driver supports both reading and writing.
If you run the same command, dropping the `driver` kwarg, you get:
```
new_ds = rasterio.open('./modified_fccs_canada.nc', 'w', width=old_ds.width, height=old_ds.height, crs=old_ds.crs, count=old_ds.count, nodata=0, dtype='float32', transform=rasterio.transform.Affine(1000,0,-2341249.999999993,0,-1000,3833423.97736665))
new_ds.driver
## 'netCDF'
```
Maybe this is a bug? We're going to run with this for now.
From here, we can get the raster data of `fccs_canada.nc` and write it to our new dataset:
```
raster = rioxarray.open_rasterio(old_ds)
new_ds.write(raster)
new_ds.close()
l = FccsLookUp(fccs_fuelload_file='./modified_fccs_canada.nc')
l.look_up({'type': 'MultiPoint', 'coordinates':[[51, -116]]})
## Traceback (most recent call last):
## [...]
## OverflowError: cannot convert float infinity to integer
```
This has brought us back to where we started. However, I lucked out and made a discovery after landing here. I remembered that this error is otherwise a result of looking up a value outside the domain of the dataset. For example, let's take the known working American continental dataset:
```
us = FccsLookUp()
us.look_up({'type': 'MultiPoint', 'coordinates':[[51, -116]]})
## Traceback (most recent call last):
## [...]
## OverflowError: cannot convert float infinity to integer
```
This fails because the request is being made for a point in Canada—a point in space outside continental US.
Previously, I'd noticed that when I loaded the dataset in QGIS, it seemed like the coordinates more eastern than they should have been. In light of this, I tried:
```
l.look_up({'type': 'MultiPoint', 'coordinates':[[51, -80]]})
{'fuelbeds': OrderedDict([('0.0', {'percent': 100.0, 'grid_cells': 480249})]), 'units': 'm^2', 'sampled_grid_cells': 480249, 'sampled_area': 240128428465.43958}
```
Of course, the next thing that came to mind was to check this on the existinng `fccs_canada.nc` dataset:
```
ca = FccsLookUp(is_canada=True)
ca.look_up({'type': 'MultiPoint', 'coordinates':[[51, -80]]})
{'fuelbeds': OrderedDict([('0.0', {'percent': 100.0, 'grid_cells': 480249})]), 'units': 'm^2', 'sampled_grid_cells': 480249, 'sampled_area': 240128428465.43958}
## Traceback (most recent call last):
## [...]
## OverflowError: cannot convert float infinity to integer
```
In the end, finicking with the dataset was not in fact a red herring. We've marshalled the dataset into working with `fccsmap`. For completeness, here's the code to generate it:
```
import rioxarray
import rasterio
from fccsmap.lookup import FccsLookUp
old_ds_filename = FccsLookUp(is_canada=True)._filename
old_ds = rasterio.open(old_ds_filename)
new_ds = rasterio.open('./modified_fccs_canada.nc', 'w', width=old_ds.width, height=old_ds.height, crs=old_ds.crs, count=old_ds.count, nodata=0, dtype='float32', transform=rasterio.transform.Affine(1000,0,-2341249.999999993,0,-1000,3833423.97736665))
raster = rioxarray.open_rasterio(old_ds)
new_ds.write(raster)
old_ds.close()
new_ds.close()
```
Remaining question: what will it take to fix the offset? Resolving this and patching it into `fccsmap` will completely resolve the problem.

View File

@ -1,64 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>nats.solutions</title>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" type="text/css" href="/assets/css/styles.css">
<!-- Search engine considerations -->
<meta name="googlebot" content="noindex">
</head>
<body>
<div class="main-container">
<main>
<img
src="/assets/img/day.png"
class="hero-img"
alt="An illustration of a smiling figure (me) sitting beside a pond with a duck, under a tree with no leaves"/>
<h1>Hi, my name is Nat</h1>
<p>
I'm a computer and environmental scientist
</p>
<p>
I <a href="https://git.nats.solutions/nat">build software</a> and computer systems. Interested in working with me? I'd be happy to provide a resume on request. I also do web development and system administration pro bono for certain causes. If you need help with your next computer thing, send me an email at natscott [at] nats [dot] solutions!
</p>
<h2>Principles</h2>
<ul>
<li>Building complex systems means having the humility to learn from nature</li>
<li>Choosing the path of least resistance is always a weakness to be negotiated with</li>
<li>Computers should be designed by and made for all people</li>
<li>Good complexity is the emergent property of simple systems</li>
</ul>
</main>
<footer>
<p class="dinkus" aria-hidden="true">* * *</p>
<p>
Hand-crafted on the land of the sovereign
<a href="https://www.syilx.org/">Syilx Okanagan Nation</a>.
</p>
<p class="license">
Unless otherwise noted, content is shared under the
<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/"
rel="license">
CC-BY-NC-SA 4.0
</a>.<br>
All code is shared under the
<a href="https://www.gnu.org/licenses/agpl-3.0.en.html"
rel="license">
AGPL
</a> (<a href="https://git.nats.solutions/nat/nats.solutions">source</a>).
<img src="/assets/img/crt.png" alt="Isometric illustration of me sitting at a desk working on a desktop computer with a CRT monitor">
</p>
</footer>
</div>
</body>
</html>