From 4776d0bc0604c8509fd78532f026d35a5ed1d551 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Sat, 4 Oct 2025 12:17:12 +0300 Subject: [PATCH 1/4] Add spectraldiversity example into docs/src --- docs/src/spectraldiversity.jl | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 docs/src/spectraldiversity.jl diff --git a/docs/src/spectraldiversity.jl b/docs/src/spectraldiversity.jl new file mode 100644 index 0000000..b366a38 --- /dev/null +++ b/docs/src/spectraldiversity.jl @@ -0,0 +1,47 @@ +using SentinelDataSource +using YAXArrays +using Distances +using DimensionalData +using AWS + +struct AnonymousGCS <:AbstractAWSConfig end +struct NoCredentials end +AWS.region(aws::AnonymousGCS) = "" # No region +AWS.credentials(aws::AnonymousGCS) = NoCredentials() # No credentials +AWS.check_credentials(c::NoCredentials) = c # Skip credentials check +AWS.sign!(aws::AnonymousGCS, ::AWS.Request) = nothing # Don't sign request +function AWS.generate_service_url(aws::AnonymousGCS, service::String, resource::String) + service == "s3" || throw(ArgumentError("Can only handle s3 requests to GCS")) + awsurl = string("https://objects.eodc.eu:2222", resource) + return awsurl +end +AWS.global_aws_config(AnonymousGCS()) +path = "s3://e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2A_MSIL2A_20180601T102021_N0500_R065_T32UPC_20230902T045008.zarr/measurements/reflectance/r20m" + +pathbase = "s3://e05ab01a9d56408d82ac32d69a5aae2a:sample-data/eopf-sample-output/" +zopen(path) +c = Cube(open_dataset(zopen(path))) +yax = YAXArray((Dim{:x}(1:500), Dim{:y}(500:-1:1), Dim{:Variables}(1:10)), rand(500,500,10)) +c = yax +function spectraldiversity(c) + dlat = dims(c, :x) + dlon = dims(c, :y) + dband = dims(c, :Variables) + # This needs a better interface for within index space + latinterval = MovingIntervals(center=dlat.val, width=3*step(dlat), n=length(dlat)-1, step=step(dlat)) + loninterval = MovingIntervals(center=dlon.val, width=-3*step(dlon), n=length(dlon)-1, step=-step(dlon)) + bandinterval = MovingIntervals(:closed, :closed, left=first(dband.val), right=last(dband.val), n=1) + meanwindows = windows(c, :y=>loninterval, :Variables => Whole()) + @show meanwindows[10,10,1] + @time xmap(innerdiversity, meanwindows) +end +spectraldiversity(c) +function innerdiversity(xout, arr) + winsize = prod(size(arr)) รท size(arr,3) + arrreshaped = reshape(arr, (winsize, size(arr,3))) + tslist = eachrow(arrreshaped) + dists = pairwise(Euclidean(), tslist) .* 2 ./ winsize .^ 4 + xout .= sum(dists) ./ 2 +end + +spectraldiversity(c) \ No newline at end of file From 77b38fdb8cf882fff624742c4757a856f3eb9674 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Fri, 7 Nov 2025 16:25:49 +0100 Subject: [PATCH 2/4] Fix loading of the data so that we get a functioning DimArray at the leaves --- src/SentinelDataSource.jl | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/SentinelDataSource.jl b/src/SentinelDataSource.jl index 34618e2..c919543 100644 --- a/src/SentinelDataSource.jl +++ b/src/SentinelDataSource.jl @@ -1,23 +1,29 @@ module SentinelDataSource -using DimensionalData: DimTree +using DimensionalData: DimTree, DimArray using Zarr: zopen using ZarrDatasets: ZarrDataset using CommonDataModel: CommonDataModel as CDM -using Rasters:Raster +using Rasters: Rasters export open_tree open_tree(path::AbstractString) = open_tree(ZarrDataset(path)) function open_tree(dataset::ZarrDataset) - stem = DimTree() - groupnames = CDM.groupnames(dataset) - varnames = CDM.varnames(dataset) - alldimnames = nesteddimnames(dataset) - for v in setdiff(varnames, alldimnames) - setindex!(stem, Raster(CDM.variable(dataset, v), lazy=true),Symbol(v)) + @time "stem" stem = DimTree() + @time "groups" groupnames = CDM.groupnames(dataset) + @time "vars" varnames = CDM.varnames(dataset) + @time "dims" alldimnames = nesteddimnames(dataset) + @time "forvar" for v in setdiff(varnames, alldimnames) + @time "var" var = CDM.variable(dataset, v) + @time "vardims" vardims = Rasters._dims(var) + @time "meta" metadata_out = Rasters._metadata(var) + @time "missing" missingval_out = Rasters._read_missingval_pair(var, metadata_out, Rasters.nokw) + @time "mod" mod = Rasters._mod(eltype(var), metadata_out, missingval_out;scaled=true, coerce=true) + @time "vardata" vardata = Rasters._maybe_modify(var, mod) + @time "set" setindex!(stem, DimArray(vardata, vardims),Symbol(v)) end - for g in groupnames + @time "forg" for g in groupnames setindex!(stem, open_tree(CDM.group(dataset, g)),Symbol(g)) end stem From 44fa81f6e8b45af748ddcc697405fa0cab9c8c6f Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 4 May 2026 09:39:17 +0200 Subject: [PATCH 3/4] Insert timeit_debug to measure the timing of loading a DimTree --- Project.toml | 8 +++++--- src/SentinelDataSource.jl | 40 +++++++++++++++++++++++---------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/Project.toml b/Project.toml index da1f611..fa3d1ad 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SentinelDataSource" uuid = "afd1bdb2-e5b9-463c-8ecd-ef9b9458f1b0" -authors = ["Felix Cremer and contributors"] version = "0.1.0-DEV" +authors = ["Felix Cremer and contributors"] [deps] CFTime = "179af706-886a-5703-950a-314cd64e0468" @@ -10,6 +10,7 @@ DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" DiskArrayTools = "fcd2136c-9f69-4db6-97e5-f31981721d63" DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" YAXArrayBase = "90b8fcef-0c2d-428d-9c56-5f86629e9d14" YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" @@ -17,13 +18,14 @@ ZarrDatasets = "519a4cdf-1362-424a-9ea1-b1d782dbb24b" [compat] CFTime = "0.2.1" -CommonDataModel = "0.3.8" +CommonDataModel = "0.3.8, 0.4" DimensionalData = "0.29.12" DiskArrayTools = "0.1.12" DiskArrays = "0.4.11" Rasters = "0.14.4" +TimerOutputs = "0.5.29" YAXArrayBase = "0.7.5" -YAXArrays = "0.6.1" +YAXArrays = "0.7" Zarr = "0.9.4" ZarrDatasets = "0.1.3" julia = "1.6.7" diff --git a/src/SentinelDataSource.jl b/src/SentinelDataSource.jl index c919543..6720b0e 100644 --- a/src/SentinelDataSource.jl +++ b/src/SentinelDataSource.jl @@ -1,30 +1,38 @@ module SentinelDataSource -using DimensionalData: DimTree, DimArray +using DimensionalData: DimTree, DimArray, DimensionalData using Zarr: zopen using ZarrDatasets: ZarrDataset using CommonDataModel: CommonDataModel as CDM using Rasters: Rasters - +using TimerOutputs export open_tree open_tree(path::AbstractString) = open_tree(ZarrDataset(path)) function open_tree(dataset::ZarrDataset) - @time "stem" stem = DimTree() - @time "groups" groupnames = CDM.groupnames(dataset) - @time "vars" varnames = CDM.varnames(dataset) - @time "dims" alldimnames = nesteddimnames(dataset) - @time "forvar" for v in setdiff(varnames, alldimnames) - @time "var" var = CDM.variable(dataset, v) - @time "vardims" vardims = Rasters._dims(var) - @time "meta" metadata_out = Rasters._metadata(var) - @time "missing" missingval_out = Rasters._read_missingval_pair(var, metadata_out, Rasters.nokw) - @time "mod" mod = Rasters._mod(eltype(var), metadata_out, missingval_out;scaled=true, coerce=true) - @time "vardata" vardata = Rasters._maybe_modify(var, mod) - @time "set" setindex!(stem, DimArray(vardata, vardims),Symbol(v)) + @timeit_debug "stem" stem = DimTree() + @timeit_debug "groups" groupnames = CDM.groupnames(dataset) + @timeit_debug "vars" varnames = CDM.varnames(dataset) + @timeit_debug "dims" alldimnames = nesteddimnames(dataset) + diffnames = setdiff(varnames, alldimnames) + for v in diffnames + @timeit_debug "var $v" begin + var = CDM.variable(dataset, v) + vardims = Rasters._dims(var) + metadata_out = Rasters._metadata(var) + missingval_out = Rasters._read_missingval_pair(var, metadata_out, Rasters.nokw) + mod = Rasters._mod(eltype(var), metadata_out, missingval_out;scaled=true, coerce=true) + #= + Rasters.FileArray{ZarrDataset}(var, filename; + name=v, Rasters.nokw, mod, write=false + ) + =# + vardata = Rasters._maybe_modify(var, mod) + setindex!(stem, DimArray(vardata, vardims),Symbol(v)) + end end - @time "forg" for g in groupnames - setindex!(stem, open_tree(CDM.group(dataset, g)),Symbol(g)) + for g in groupnames + @timeit_debug "forg $g" setindex!(stem, open_tree(CDM.group(dataset, g)),Symbol(g)) end stem end From af5362a7ce88bbabaf9997c61bb1c0532ac89958 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 4 May 2026 09:51:16 +0200 Subject: [PATCH 4/4] Remove Stale deps --- Project.toml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Project.toml b/Project.toml index fa3d1ad..0599047 100644 --- a/Project.toml +++ b/Project.toml @@ -7,12 +7,9 @@ authors = ["Felix Cremer and contributors"] CFTime = "179af706-886a-5703-950a-314cd64e0468" CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" -DiskArrayTools = "fcd2136c-9f69-4db6-97e5-f31981721d63" DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -YAXArrayBase = "90b8fcef-0c2d-428d-9c56-5f86629e9d14" -YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" ZarrDatasets = "519a4cdf-1362-424a-9ea1-b1d782dbb24b" @@ -20,12 +17,9 @@ ZarrDatasets = "519a4cdf-1362-424a-9ea1-b1d782dbb24b" CFTime = "0.2.1" CommonDataModel = "0.3.8, 0.4" DimensionalData = "0.29.12" -DiskArrayTools = "0.1.12" DiskArrays = "0.4.11" Rasters = "0.14.4" TimerOutputs = "0.5.29" -YAXArrayBase = "0.7.5" -YAXArrays = "0.7" Zarr = "0.9.4" ZarrDatasets = "0.1.3" julia = "1.6.7"