Skip to content

Commit 0357606

Browse files
authored
NEWDATA(CAVM-2-0): Add var landCoverCat with attrs flag_values, flag_meanings, flag_descriptions, flag_colors; v20260331.nc
2 parents f0e1276 + 0d03dde commit 0357606

14 files changed

Lines changed: 522 additions & 117 deletions

File tree

data/CAVM-2-0/convert.py

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
import time
2+
from pathlib import Path
3+
4+
import matplotlib.pyplot as plt
5+
import numpy as np
6+
import pandas as pd
7+
import rioxarray as rxr
8+
from matplotlib.colors import BoundaryNorm, ListedColormap
9+
from rasterio.enums import Resampling
10+
11+
import ilamb3_data as ild
12+
13+
VAR = "landCoverCat"
14+
15+
RAW_PATH = Path("_raw")
16+
if not RAW_PATH.is_dir():
17+
RAW_PATH.mkdir()
18+
19+
20+
# --------------------------------------------------------------------------------------
21+
# 1) Download
22+
# --------------------------------------------------------------------------------------
23+
24+
25+
# Download CAVM 2.0 data from Mendeley Data HTML and unzip
26+
remote_source = "https://data.mendeley.com/public-files/datasets/c4xj5rv6kv/files/5223c414-234a-498c-ae08-3100cb38510f/file_downloaded"
27+
local_source = RAW_PATH / "Raster CAVM GIS data.zip"
28+
local_source = ild.download_from_html(remote_source, local_source)
29+
download_stamp = ild.gen_utc_timestamp(local_source.stat().st_mtime)
30+
31+
# --------------------------------------------------------------------------------------
32+
# 2) Read Data
33+
# --------------------------------------------------------------------------------------
34+
35+
36+
# Read TIF and legend CSV
37+
ds = rxr.open_rasterio(local_source / "raster_cavm_v1.tif", band_as_variable=True)
38+
39+
# --------------------------------------------------------------------------------------
40+
# 3) Manual cleaning/formatting
41+
# --------------------------------------------------------------------------------------
42+
43+
44+
# Helper function for later
45+
def _clean_flag_description(s: str) -> str:
46+
s = s.strip().lower()
47+
s = s.replace(",", "")
48+
s = s.replace("/", "-")
49+
s = s.replace(" ", "_")
50+
return s
51+
52+
53+
# Reproject to lat/lon and rename dimensions/variables
54+
ds = ds.rio.reproject("EPSG:4326", resampling=Resampling.nearest)
55+
ds = ds.rename({"band_1": VAR, "x": "lon", "y": "lat"})
56+
57+
# Clip CAVM extent to non-null bounding box
58+
valid = ds[VAR] != 127
59+
ds = ds.sel(
60+
lat=ds.lat[valid.any(dim="lon")],
61+
lon=ds.lon[valid.any(dim="lat")],
62+
)
63+
64+
# Read CAVM legend and use it to set flag values/meanings and descriptions
65+
legend = local_source / "Raster CAVM legend.csv"
66+
df = pd.read_csv(
67+
legend, skiprows=[0, 1], na_filter=False, dtype={"Raster code": np.int8}
68+
)
69+
df = df.sort_values("Raster code").reset_index(drop=True)
70+
# Fill in some missing short descriptions manually
71+
desc_fill = {
72+
"FW": "Fresh water",
73+
"SW": "Salt water",
74+
"GL": "Glacier",
75+
"NA": "Non-Arctic",
76+
}
77+
mask = df["Short Description"].str.strip() == ""
78+
df.loc[mask, "Short Description"] = df.loc[mask, "Vegetation Unit"].map(desc_fill)
79+
80+
# Palette for CAVM visualization
81+
palette = {
82+
1: "#d7d7b3",
83+
2: "#a8a802",
84+
3: "#a68282",
85+
4: "#8282a0",
86+
5: "#cdcd66",
87+
21: "#ffebaf",
88+
22: "#ffd37f",
89+
23: "#e6e600",
90+
24: "#ffff00",
91+
31: "#dfb0b0",
92+
32: "#db949e",
93+
33: "#97e602",
94+
34: "#38a802",
95+
41: "#9eedbd",
96+
42: "#73ffdf",
97+
43: "#04e6a9",
98+
91: "#0070ff",
99+
92: "#e0f2ff",
100+
93: "#ffffff",
101+
99: "#cccccc",
102+
}
103+
104+
# Merge palette into df and filter to codes present in the raster so that
105+
# flag_values, flag_meanings, flag_descriptions, and flag_colors all align.
106+
df["color"] = df["Raster code"].map(palette)
107+
raster_codes = set(
108+
np.unique(ds[VAR].values[(~np.isnan(ds[VAR].values)) & (ds[VAR].values != 127)])
109+
)
110+
df = df[df["Raster code"].isin(raster_codes)].copy()
111+
112+
113+
# --------------------------------------------------------------------------------------
114+
# 4) Automatic cleaning/formatting using ILAMB utilities
115+
# --------------------------------------------------------------------------------------
116+
117+
# Set lat/lon/var attributes
118+
ds = ild.set_lat_attrs(ds)
119+
ds = ild.set_lon_attrs(ds)
120+
ds = ild.set_coord_bounds(ds, "lat")
121+
ds = ild.set_coord_bounds(ds, "lon")
122+
123+
# Decisions about naming derived from https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_variables.json
124+
# Search for "landCoverFrac" to see how I modeled this
125+
ds = ild.set_var_attrs(
126+
ds,
127+
VAR,
128+
units="",
129+
standard_name="cover_category",
130+
long_name="Vegetation or Land-Cover Category",
131+
flag_values=np.asarray(df["Raster code"], dtype=np.int8),
132+
flag_meanings=df["Vegetation Unit"].to_list(),
133+
extra_attrs={
134+
"flag_descriptions": " ".join(
135+
_clean_flag_description(s) for s in df["Short Description"]
136+
),
137+
"flag_colors": " ".join(df["color"]),
138+
},
139+
target_dtype=np.dtype("int8"),
140+
compression={"zlib": True, "complevel": 4, "shuffle": True},
141+
overwrite=True,
142+
)
143+
144+
# Final cleanup
145+
ds = ds.drop_vars("spatial_ref")
146+
147+
# --------------------------------------------------------------------------------------
148+
# 5) Set Obs4MIPs-compliant global attributes and write NetCDF
149+
# --------------------------------------------------------------------------------------
150+
151+
# Set global attributes and write NetCDF
152+
generate_stamp = time.strftime("%Y%m%d")
153+
tracking_id = ild.gen_trackingid()
154+
ds = ild.set_ods26_global_attrs(
155+
ds,
156+
activity_id="ILAMB",
157+
contact="Martha Raynolds (mkraynolds@alaska.edu)",
158+
creation_date=generate_stamp,
159+
dataset_contributor="Morgan Steckler",
160+
doi="https://doi.org/10.17632/c4xj5rv6kv.2",
161+
frequency="fx",
162+
grid="1x1 km Lambert Azimuthal Equal Area reprojected to latitude x longitude",
163+
grid_label="gr",
164+
has_aux_unc="FALSE",
165+
history=f"""
166+
{generate_stamp}: "CMORized" data from Raster CAVM 2.0 (downloaded from Mendeley Data)\n
167+
{generate_stamp}: Reprojected to lat/lon and added metadata using ILAMB utilities
168+
""",
169+
institution="University of Alaska, Fairbanks, USA",
170+
institution_id="UAF",
171+
license="https://creativecommons.org/licenses/by-nc/3.0/deed.en",
172+
nominal_resolution="1 km",
173+
processing_code_location="https://github.com/rubisco-sfa/ilamb3-data/tree/main/data/CAVM-2-0/convert.py",
174+
product="derived",
175+
realm="land",
176+
references="Raynolds, M.K., et al. (2019). A raster version of the Circumpolar Arctic Vegetation Map (CAVM). Remote Sensing of Environment, 232, 111297. https://doi.org/10.1016/j.rse.2019.111297",
177+
region="panarctic",
178+
source="Unsupervised classifications of seventeen geographic/floristic sub-sections of the Arctic, using AVHRR and MODIS data (reflectance and NDVI) and elevation data",
179+
source_id="CAVM-2-0",
180+
source_data_retrieval_date=download_stamp,
181+
source_data_url=str(remote_source),
182+
source_label="CAVM",
183+
source_type="satellite_retrieval",
184+
source_version_number="2.0",
185+
title="Raster Circumpolar Arctic Vegetation Map",
186+
tracking_id=tracking_id,
187+
variable_id=VAR,
188+
variant_label="ILAMB",
189+
variant_info="CMORized product prepared by ILAMB",
190+
version=f"v{generate_stamp}",
191+
)
192+
out_path = ild.create_output_filename(ds.attrs)
193+
ds.to_netcdf(out_path)
194+
195+
# --------------------------------------------------------------------------------------
196+
# Plotting verification
197+
# --------------------------------------------------------------------------------------
198+
199+
codes = ds[VAR].attrs["flag_values"]
200+
colors = ds[VAR].attrs["flag_colors"].split()
201+
labels = ds[VAR].attrs["flag_meanings"].split()
202+
203+
idx_map = {c: i for i, c in enumerate(codes)}
204+
mapped = np.vectorize(lambda x: idx_map.get(x, np.nan), otypes=[float])(ds[VAR].values)
205+
206+
fig, ax = plt.subplots(figsize=(10, 5))
207+
ax.pcolormesh(
208+
ds["lon"],
209+
ds["lat"],
210+
mapped,
211+
cmap=ListedColormap(colors),
212+
norm=BoundaryNorm(np.arange(-0.5, len(codes)), len(codes)),
213+
)
214+
cbar = plt.colorbar(ax.collections[0], ax=ax, ticks=range(len(codes)))
215+
cbar.ax.set_yticklabels(labels)
216+
plt.tight_layout()
217+
plt.savefig(f"{VAR}.png", dpi=300)
218+
plt.close()

data/CERES-EBAF-4-2-1/convert.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@
5858
ds = set_var_attrs(
5959
ds,
6060
var,
61-
ds[var].attrs["units"],
62-
var_info["cf_standard_name"],
63-
var_info["variable_long_name"],
64-
target_dtype=np.float32,
61+
units=ds[var].attrs["units"],
62+
standard_name=var_info["cf_standard_name"],
63+
long_name=var_info["variable_long_name"],
64+
target_dtype=np.dtype("float32"),
6565
)
6666

6767
# Remove some straggling var attrs

data/CLASS-1-1/convert.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
for remote_source, local_source in zip(remote_sources, local_sources):
2828
if not Path(local_source).is_file():
2929
ild.download_from_html(remote_source, local_source)
30-
download_stamp = ild.gen_utc_timestamp(Path(local_source).stat().st_mtime)
30+
download_stamp = ild.gen_utc_timestamp(Path(local_sources[0]).stat().st_mtime)
3131

3232
# open and rename some variables
3333
ds = xr.open_mfdataset(local_sources)
@@ -54,30 +54,30 @@
5454
)
5555

5656
# get standard/long name info and manage when it's not in cmip6 CV or is uncertainty
57+
var_info = {
58+
"cf_standard_name": out[var].attrs.get("standard_name", var),
59+
"variable_long_name": out[var].attrs.get(
60+
"long_name", var.replace("_", " ").title()
61+
),
62+
"variable_units": out[var].attrs.get("units", ""),
63+
}
5764
if "_sd" not in var:
5865
try:
5966
var_info = ild.get_cmip6_variable_info(var, var)
6067
except Exception:
61-
var_info = {
62-
"cf_standard_name": out[var].attrs.get("standard_name", var),
63-
"variable_long_name": out[var].attrs.get(
64-
"long_name", var.replace("_", " ").title()
65-
),
66-
"variable_units": out[var].attrs.get("units", ""),
67-
}
6868
var_info["variable_long_name"] = (
6969
var_info["variable_long_name"].replace("_", " ").title()
7070
)
7171

7272
# format the var attrs
7373
out = ild.set_var_attrs(
7474
out,
75-
var=var,
76-
cmip6_units=var_info["variable_units"],
77-
cmip6_standard_name=var_info["cf_standard_name"],
78-
cmip6_long_name=var_info["variable_long_name"],
75+
var,
76+
units=var_info["variable_units"],
77+
standard_name=var_info["cf_standard_name"],
78+
long_name=var_info["variable_long_name"],
7979
ancillary_variables=uncert,
80-
target_dtype=np.float32,
80+
target_dtype=np.dtype("float32"),
8181
convert=False,
8282
)
8383

data/GEWEX-SRB-4-IP/convert.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@
8686
ds = set_var_attrs(
8787
ds,
8888
var,
89-
ds[var].attrs["units"],
90-
var_info[CFAttr.cf_standard_name.value],
91-
var_info[CFAttr.variable_long_name.value],
92-
target_dtype=np.float32,
89+
units=ds[var].attrs["units"],
90+
standard_name=var_info[CFAttr.cf_standard_name.value],
91+
long_name=var_info[CFAttr.variable_long_name.value],
92+
target_dtype=np.dtype("float32"),
9393
)
9494

9595
# Remove some straggling var attrs

data/GFED-5-0/convert.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,10 @@
167167
ds = set_var_attrs(
168168
ds,
169169
var="burntFractionAll",
170-
cmip6_units="%",
171-
cmip6_standard_name=var_info["cf_standard_name"],
172-
cmip6_long_name=var_info["variable_long_name"],
173-
target_dtype=np.float32,
174-
convert=False,
170+
units="%",
171+
standard_name=var_info["cf_standard_name"],
172+
long_name=var_info["variable_long_name"],
173+
target_dtype=np.dtype("float32"),
175174
)
176175

177176
# Set global attributes and export

data/HWSD-2-0/convert.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,11 @@ def create_netcdf(
295295
# set variable attributes
296296
ds = set_var_attrs(
297297
ds,
298-
var=var,
299-
cmip6_units=info["variable_units"],
300-
cmip6_standard_name=info["cf_standard_name"],
301-
cmip6_long_name=info["variable_long_name"],
302-
target_dtype=np.float32,
298+
var,
299+
units=info["variable_units"],
300+
standard_name=info["cf_standard_name"],
301+
long_name=info["variable_long_name"],
302+
target_dtype=np.dtype("float32"),
303303
)
304304

305305
# create history variable

data/Hoffman-1-0/convert.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,12 @@
4949
# Set correct attribute information for the vars
5050
ds = set_var_attrs(
5151
ds,
52-
var=var,
53-
cmip6_units=var_info["variable_units"],
54-
cmip6_standard_name=var_info["cf_standard_name"],
55-
cmip6_long_name=var_info["variable_long_name"],
52+
var,
53+
units=var_info["variable_units"],
54+
standard_name=var_info["cf_standard_name"],
55+
long_name=var_info["variable_long_name"],
5656
ancillary_variables=f"{var}_{UNC}",
57-
target_dtype=np.float32,
58-
convert=False,
57+
target_dtype=np.dtype("float32"),
5958
)
6059

6160
# Assign ancillary variables

data/LORA-1-0/convert.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@
4949
# Set correct attribute information for the vars
5050
ds = set_var_attrs(
5151
ds,
52-
var="mrro",
53-
cmip6_units=mrro_info["variable_units"],
54-
cmip6_standard_name=mrro_info["cf_standard_name"],
55-
cmip6_long_name=mrro_info["variable_long_name"],
52+
"mrro",
53+
units=mrro_info["variable_units"],
54+
standard_name=mrro_info["cf_standard_name"],
55+
long_name=mrro_info["variable_long_name"],
5656
ancillary_variables="mrro_sd",
57-
target_dtype=np.float32,
57+
target_dtype=np.dtype("float32"),
5858
convert=True,
5959
)
6060

@@ -91,7 +91,7 @@
9191
grid_label="gn",
9292
has_aux_unc="TRUE",
9393
history=f"""
94-
{download_stamp}: downloaded {remote_source};
94+
{download_stamp}: downloaded {remote_sources};
9595
{creation_stamp}: converted to obs4MIPs format""",
9696
institution="ARC Centre of Excellence for Climate System Science, NSW, Australia",
9797
institution_id="ARCCSS",

data/RAPID-2023-1a/convert.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,11 @@
7171
var_info = get_cmip6_variable_info("msftmz", "msftmz")
7272
ds = set_var_attrs(
7373
ds,
74-
var="msftmz",
75-
cmip6_units=var_info["variable_units"],
76-
cmip6_standard_name=var_info["cf_standard_name"],
77-
cmip6_long_name=var_info["variable_long_name"],
78-
target_dtype=np.float32,
79-
convert=False,
74+
"msftmz",
75+
units=var_info["variable_units"],
76+
standard_name=var_info["cf_standard_name"],
77+
long_name=var_info["variable_long_name"],
78+
target_dtype=np.dtype("float32"),
8079
)
8180

8281
# Clean up attrs

0 commit comments

Comments
 (0)