|
| 1 | +import time |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import numpy as np |
| 6 | +import pandas as pd |
| 7 | +import rioxarray as rxr |
| 8 | +from matplotlib.colors import BoundaryNorm, ListedColormap |
| 9 | +from rasterio.enums import Resampling |
| 10 | + |
| 11 | +import ilamb3_data as ild |
| 12 | + |
| 13 | +VAR = "landCoverCat" |
| 14 | + |
| 15 | +RAW_PATH = Path("_raw") |
| 16 | +if not RAW_PATH.is_dir(): |
| 17 | + RAW_PATH.mkdir() |
| 18 | + |
| 19 | + |
| 20 | +# -------------------------------------------------------------------------------------- |
| 21 | +# 1) Download |
| 22 | +# -------------------------------------------------------------------------------------- |
| 23 | + |
| 24 | + |
| 25 | +# Download CAVM 2.0 data from Mendeley Data HTML and unzip |
| 26 | +remote_source = "https://data.mendeley.com/public-files/datasets/c4xj5rv6kv/files/5223c414-234a-498c-ae08-3100cb38510f/file_downloaded" |
| 27 | +local_source = RAW_PATH / "Raster CAVM GIS data.zip" |
| 28 | +local_source = ild.download_from_html(remote_source, local_source) |
| 29 | +download_stamp = ild.gen_utc_timestamp(local_source.stat().st_mtime) |
| 30 | + |
| 31 | +# -------------------------------------------------------------------------------------- |
| 32 | +# 2) Read Data |
| 33 | +# -------------------------------------------------------------------------------------- |
| 34 | + |
| 35 | + |
| 36 | +# Read TIF and legend CSV |
| 37 | +ds = rxr.open_rasterio(local_source / "raster_cavm_v1.tif", band_as_variable=True) |
| 38 | + |
| 39 | +# -------------------------------------------------------------------------------------- |
| 40 | +# 3) Manual cleaning/formatting |
| 41 | +# -------------------------------------------------------------------------------------- |
| 42 | + |
| 43 | + |
| 44 | +# Helper function for later |
| 45 | +def _clean_flag_description(s: str) -> str: |
| 46 | + s = s.strip().lower() |
| 47 | + s = s.replace(",", "") |
| 48 | + s = s.replace("/", "-") |
| 49 | + s = s.replace(" ", "_") |
| 50 | + return s |
| 51 | + |
| 52 | + |
| 53 | +# Reproject to lat/lon and rename dimensions/variables |
| 54 | +ds = ds.rio.reproject("EPSG:4326", resampling=Resampling.nearest) |
| 55 | +ds = ds.rename({"band_1": VAR, "x": "lon", "y": "lat"}) |
| 56 | + |
| 57 | +# Clip CAVM extent to non-null bounding box |
| 58 | +valid = ds[VAR] != 127 |
| 59 | +ds = ds.sel( |
| 60 | + lat=ds.lat[valid.any(dim="lon")], |
| 61 | + lon=ds.lon[valid.any(dim="lat")], |
| 62 | +) |
| 63 | + |
| 64 | +# Read CAVM legend and use it to set flag values/meanings and descriptions |
| 65 | +legend = local_source / "Raster CAVM legend.csv" |
| 66 | +df = pd.read_csv( |
| 67 | + legend, skiprows=[0, 1], na_filter=False, dtype={"Raster code": np.int8} |
| 68 | +) |
| 69 | +df = df.sort_values("Raster code").reset_index(drop=True) |
| 70 | +# Fill in some missing short descriptions manually |
| 71 | +desc_fill = { |
| 72 | + "FW": "Fresh water", |
| 73 | + "SW": "Salt water", |
| 74 | + "GL": "Glacier", |
| 75 | + "NA": "Non-Arctic", |
| 76 | +} |
| 77 | +mask = df["Short Description"].str.strip() == "" |
| 78 | +df.loc[mask, "Short Description"] = df.loc[mask, "Vegetation Unit"].map(desc_fill) |
| 79 | + |
| 80 | +# Palette for CAVM visualization |
| 81 | +palette = { |
| 82 | + 1: "#d7d7b3", |
| 83 | + 2: "#a8a802", |
| 84 | + 3: "#a68282", |
| 85 | + 4: "#8282a0", |
| 86 | + 5: "#cdcd66", |
| 87 | + 21: "#ffebaf", |
| 88 | + 22: "#ffd37f", |
| 89 | + 23: "#e6e600", |
| 90 | + 24: "#ffff00", |
| 91 | + 31: "#dfb0b0", |
| 92 | + 32: "#db949e", |
| 93 | + 33: "#97e602", |
| 94 | + 34: "#38a802", |
| 95 | + 41: "#9eedbd", |
| 96 | + 42: "#73ffdf", |
| 97 | + 43: "#04e6a9", |
| 98 | + 91: "#0070ff", |
| 99 | + 92: "#e0f2ff", |
| 100 | + 93: "#ffffff", |
| 101 | + 99: "#cccccc", |
| 102 | +} |
| 103 | + |
| 104 | +# Merge palette into df and filter to codes present in the raster so that |
| 105 | +# flag_values, flag_meanings, flag_descriptions, and flag_colors all align. |
| 106 | +df["color"] = df["Raster code"].map(palette) |
| 107 | +raster_codes = set( |
| 108 | + np.unique(ds[VAR].values[(~np.isnan(ds[VAR].values)) & (ds[VAR].values != 127)]) |
| 109 | +) |
| 110 | +df = df[df["Raster code"].isin(raster_codes)].copy() |
| 111 | + |
| 112 | + |
| 113 | +# -------------------------------------------------------------------------------------- |
| 114 | +# 4) Automatic cleaning/formatting using ILAMB utilities |
| 115 | +# -------------------------------------------------------------------------------------- |
| 116 | + |
| 117 | +# Set lat/lon/var attributes |
| 118 | +ds = ild.set_lat_attrs(ds) |
| 119 | +ds = ild.set_lon_attrs(ds) |
| 120 | +ds = ild.set_coord_bounds(ds, "lat") |
| 121 | +ds = ild.set_coord_bounds(ds, "lon") |
| 122 | + |
| 123 | +# Decisions about naming derived from https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_variables.json |
| 124 | +# Search for "landCoverFrac" to see how I modeled this |
| 125 | +ds = ild.set_var_attrs( |
| 126 | + ds, |
| 127 | + VAR, |
| 128 | + units="", |
| 129 | + standard_name="cover_category", |
| 130 | + long_name="Vegetation or Land-Cover Category", |
| 131 | + flag_values=np.asarray(df["Raster code"], dtype=np.int8), |
| 132 | + flag_meanings=df["Vegetation Unit"].to_list(), |
| 133 | + extra_attrs={ |
| 134 | + "flag_descriptions": " ".join( |
| 135 | + _clean_flag_description(s) for s in df["Short Description"] |
| 136 | + ), |
| 137 | + "flag_colors": " ".join(df["color"]), |
| 138 | + }, |
| 139 | + target_dtype=np.dtype("int8"), |
| 140 | + compression={"zlib": True, "complevel": 4, "shuffle": True}, |
| 141 | + overwrite=True, |
| 142 | +) |
| 143 | + |
| 144 | +# Final cleanup |
| 145 | +ds = ds.drop_vars("spatial_ref") |
| 146 | + |
| 147 | +# -------------------------------------------------------------------------------------- |
| 148 | +# 5) Set Obs4MIPs-compliant global attributes and write NetCDF |
| 149 | +# -------------------------------------------------------------------------------------- |
| 150 | + |
| 151 | +# Set global attributes and write NetCDF |
| 152 | +generate_stamp = time.strftime("%Y%m%d") |
| 153 | +tracking_id = ild.gen_trackingid() |
| 154 | +ds = ild.set_ods26_global_attrs( |
| 155 | + ds, |
| 156 | + activity_id="ILAMB", |
| 157 | + contact="Martha Raynolds (mkraynolds@alaska.edu)", |
| 158 | + creation_date=generate_stamp, |
| 159 | + dataset_contributor="Morgan Steckler", |
| 160 | + doi="https://doi.org/10.17632/c4xj5rv6kv.2", |
| 161 | + frequency="fx", |
| 162 | + grid="1x1 km Lambert Azimuthal Equal Area reprojected to latitude x longitude", |
| 163 | + grid_label="gr", |
| 164 | + has_aux_unc="FALSE", |
| 165 | + history=f""" |
| 166 | +{generate_stamp}: "CMORized" data from Raster CAVM 2.0 (downloaded from Mendeley Data)\n |
| 167 | +{generate_stamp}: Reprojected to lat/lon and added metadata using ILAMB utilities |
| 168 | +""", |
| 169 | + institution="University of Alaska, Fairbanks, USA", |
| 170 | + institution_id="UAF", |
| 171 | + license="https://creativecommons.org/licenses/by-nc/3.0/deed.en", |
| 172 | + nominal_resolution="1 km", |
| 173 | + processing_code_location="https://github.com/rubisco-sfa/ilamb3-data/tree/main/data/CAVM-2-0/convert.py", |
| 174 | + product="derived", |
| 175 | + realm="land", |
| 176 | + references="Raynolds, M.K., et al. (2019). A raster version of the Circumpolar Arctic Vegetation Map (CAVM). Remote Sensing of Environment, 232, 111297. https://doi.org/10.1016/j.rse.2019.111297", |
| 177 | + region="panarctic", |
| 178 | + source="Unsupervised classifications of seventeen geographic/floristic sub-sections of the Arctic, using AVHRR and MODIS data (reflectance and NDVI) and elevation data", |
| 179 | + source_id="CAVM-2-0", |
| 180 | + source_data_retrieval_date=download_stamp, |
| 181 | + source_data_url=str(remote_source), |
| 182 | + source_label="CAVM", |
| 183 | + source_type="satellite_retrieval", |
| 184 | + source_version_number="2.0", |
| 185 | + title="Raster Circumpolar Arctic Vegetation Map", |
| 186 | + tracking_id=tracking_id, |
| 187 | + variable_id=VAR, |
| 188 | + variant_label="ILAMB", |
| 189 | + variant_info="CMORized product prepared by ILAMB", |
| 190 | + version=f"v{generate_stamp}", |
| 191 | +) |
| 192 | +out_path = ild.create_output_filename(ds.attrs) |
| 193 | +ds.to_netcdf(out_path) |
| 194 | + |
| 195 | +# -------------------------------------------------------------------------------------- |
| 196 | +# Plotting verification |
| 197 | +# -------------------------------------------------------------------------------------- |
| 198 | + |
| 199 | +codes = ds[VAR].attrs["flag_values"] |
| 200 | +colors = ds[VAR].attrs["flag_colors"].split() |
| 201 | +labels = ds[VAR].attrs["flag_meanings"].split() |
| 202 | + |
| 203 | +idx_map = {c: i for i, c in enumerate(codes)} |
| 204 | +mapped = np.vectorize(lambda x: idx_map.get(x, np.nan), otypes=[float])(ds[VAR].values) |
| 205 | + |
| 206 | +fig, ax = plt.subplots(figsize=(10, 5)) |
| 207 | +ax.pcolormesh( |
| 208 | + ds["lon"], |
| 209 | + ds["lat"], |
| 210 | + mapped, |
| 211 | + cmap=ListedColormap(colors), |
| 212 | + norm=BoundaryNorm(np.arange(-0.5, len(codes)), len(codes)), |
| 213 | +) |
| 214 | +cbar = plt.colorbar(ax.collections[0], ax=ax, ticks=range(len(codes))) |
| 215 | +cbar.ax.set_yticklabels(labels) |
| 216 | +plt.tight_layout() |
| 217 | +plt.savefig(f"{VAR}.png", dpi=300) |
| 218 | +plt.close() |
0 commit comments