diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 29ae36d53765fc..531176ad50c2d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -34,6 +34,7 @@ import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OdbcCatalogResource; import org.apache.doris.catalog.OdbcTable; import org.apache.doris.catalog.OlapTable; @@ -1539,7 +1540,8 @@ protected Partition resetTabletForRestore(OlapTable localTbl, OlapTable remoteTb // replicas try { Pair>, TStorageMedium> beIdsAndMedium = Env.getCurrentSystemInfo() - .selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexes, null, false, false); + .selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexes, null, + MediumAllocationMode.ADAPTIVE, false); Map> beIds = beIdsAndMedium.first; for (Map.Entry> entry : beIds.entrySet()) { for (Long beId : entry.getValue()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java index 9d17e7f4dee857..dad7cf6c863750 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java @@ -43,7 +43,17 @@ public class DataProperty implements GsonPostProcessable { private String storagePolicy; @SerializedName(value = "isMutable") private boolean isMutable = true; - private boolean storageMediumSpecified; + // Whether the medium was explicitly requested by the user at CREATE TABLE time. + // ADAPTIVE: default / auto - placement may pick any available medium + // STRICT : user explicitly asked for a specific medium (e.g. storage_medium=ssd) + // + // NOTE: on master this signal lived in a transient `storageMediumSpecified` boolean + // that was never persisted (missing @SerializedName), so it silently reverted to false + // after an FE restart. By persisting it here we both (a) carry the signal across + // restarts correctly and (b) get a typed home for the upcoming restore-side medium + // decision logic. See MediumAllocationMode for semantics. + @SerializedName(value = "mediumAllocationMode") + private MediumAllocationMode mediumAllocationMode = MediumAllocationMode.ADAPTIVE; private DataProperty() { // for persist @@ -53,6 +63,7 @@ public DataProperty(TStorageMedium medium) { this.storageMedium = medium; this.cooldownTimeMs = MAX_COOLDOWN_TIME_MS; this.storagePolicy = ""; + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; } public DataProperty(DataProperty other) { @@ -60,6 +71,7 @@ public DataProperty(DataProperty other) { this.cooldownTimeMs = other.cooldownTimeMs; this.storagePolicy = other.storagePolicy; this.isMutable = other.isMutable; + this.mediumAllocationMode = other.mediumAllocationMode; } /** @@ -78,6 +90,7 @@ public DataProperty(TStorageMedium medium, long cooldown, String storagePolicy, this.cooldownTimeMs = cooldown; this.storagePolicy = storagePolicy; this.isMutable = isMutable; + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; } public TStorageMedium getStorageMedium() { @@ -96,8 +109,35 @@ public void setStoragePolicy(String storagePolicy) { this.storagePolicy = storagePolicy; } + public MediumAllocationMode getMediumAllocationMode() { + return mediumAllocationMode; + } + + public void setMediumAllocationMode(MediumAllocationMode mode) { + this.mediumAllocationMode = (mode == null ? MediumAllocationMode.ADAPTIVE : mode); + } + + /** + * Legacy alias kept so that callers migrating in follow-up commits do not all + * need to change at once. Prefer {@link #getMediumAllocationMode()} in new code. + * + * @deprecated use {@link #getMediumAllocationMode()} and compare with + * {@link MediumAllocationMode#STRICT}. + */ + @Deprecated public boolean isStorageMediumSpecified() { - return storageMediumSpecified; + return mediumAllocationMode == MediumAllocationMode.STRICT; + } + + /** + * Legacy setter kept so that callers migrating in follow-up commits do not all + * need to change at once. Prefer {@link #setMediumAllocationMode(MediumAllocationMode)}. + * + * @deprecated use {@link #setMediumAllocationMode(MediumAllocationMode)}. + */ + @Deprecated + public void setStorageMediumSpecified(boolean isSpecified) { + this.mediumAllocationMode = isSpecified ? MediumAllocationMode.STRICT : MediumAllocationMode.ADAPTIVE; } public boolean isMutable() { @@ -108,17 +148,13 @@ public void setMutable(boolean mutable) { isMutable = mutable; } - public void setStorageMediumSpecified(boolean isSpecified) { - storageMediumSpecified = isSpecified; - } - public void setStorageMedium(TStorageMedium medium) { this.storageMedium = medium; } @Override public int hashCode() { - return Objects.hash(storageMedium, cooldownTimeMs, storagePolicy); + return Objects.hash(storageMedium, cooldownTimeMs, storagePolicy, mediumAllocationMode); } @Override @@ -136,7 +172,8 @@ public boolean equals(Object obj) { return this.storageMedium == other.storageMedium && this.cooldownTimeMs == other.cooldownTimeMs && Strings.nullToEmpty(this.storagePolicy).equals(Strings.nullToEmpty(other.storagePolicy)) - && this.isMutable == other.isMutable; + && this.isMutable == other.isMutable + && this.mediumAllocationMode == other.mediumAllocationMode; } @Override @@ -145,6 +182,7 @@ public String toString() { sb.append("Storage medium[").append(this.storageMedium).append("]. "); sb.append("cool down[").append(TimeUtils.longToTimeString(cooldownTimeMs)).append("]. "); sb.append("remote storage policy[").append(this.storagePolicy).append("]. "); + sb.append("medium allocation mode[").append(this.mediumAllocationMode).append("]. "); return sb.toString(); } @@ -152,6 +190,9 @@ public String toString() { public void gsonPostProcess() throws IOException { // storagePolicy is a newly added field, it may be null when replaying from old version. this.storagePolicy = Strings.nullToEmpty(this.storagePolicy); + // mediumAllocationMode is a newly added field; old images won't contain it, so default to ADAPTIVE. + if (this.mediumAllocationMode == null) { + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; + } } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java new file mode 100644 index 00000000000000..7bb93f9566cebf --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; + +import com.google.common.base.Strings; + +/** + * Defines how Doris decides the storage medium of a data property + * (partition-level / table-level). + * + *

Hard-binding semantics with CREATE TABLE: + *

    + *
  • {@code PROPERTIES("storage_medium"="ssd|hdd")} -> {@link #STRICT}
  • + *
  • no {@code storage_medium} property -> {@link #ADAPTIVE}
  • + *
+ * + *

The mode drives replica placement: + *

    + *
  • {@link #STRICT}: user explicitly requested a medium; placement must + * honour it and fail if the requested medium cannot be satisfied.
  • + *
  • {@link #ADAPTIVE}: medium is a hint; placement may pick any available + * medium according to cluster capacity.
  • + *
+ */ +public enum MediumAllocationMode { + STRICT("strict"), + ADAPTIVE("adaptive"); + + private final String value; + + MediumAllocationMode(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + public boolean isStrict() { + return this == STRICT; + } + + public boolean isAdaptive() { + return this == ADAPTIVE; + } + + /** + * Parse a user-provided string (case-insensitive, trimmed) into the enum. + * + * @throws AnalysisException if the value is null, blank or unrecognised. + */ + public static MediumAllocationMode fromString(String value) throws AnalysisException { + String trimmed = Strings.nullToEmpty(value).trim(); + if (trimmed.isEmpty()) { + throw new AnalysisException("medium_allocation_mode cannot be null or empty"); + } + for (MediumAllocationMode mode : values()) { + if (mode.value.equalsIgnoreCase(trimmed)) { + return mode; + } + } + throw new AnalysisException(String.format( + "Invalid medium_allocation_mode value: '%s'. Valid options are: 'strict', 'adaptive'", + value)); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index e93e521f1516f7..6dca34474f758a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -970,7 +970,7 @@ && getTableProperty().getDynamicPartitionProperty().getBuckets() Pair>, TStorageMedium> tag2beIdsAndMedium = Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation( replicaAlloc, nextIndexes, null, - false, false); + MediumAllocationMode.ADAPTIVE, false); tag2beIds = tag2beIdsAndMedium.first; } for (Map.Entry> entry3 : tag2beIds.entrySet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java index 2221a8f8fb7340..8eb70a8db0ba0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java @@ -73,6 +73,17 @@ public class TableProperty implements GsonPostProcessable { private TStorageMedium storageMedium = null; + // Table-level cache of the medium allocation mode derived from the table's + // raw PROPERTIES map. Like {@link #storageMedium} this is rebuilt from + // {@code properties} on load rather than persisted as its own field, so + // {@link DataProperty#mediumAllocationMode} (the partition-level source of + // truth) remains the only persisted copy. + // + // Hard-binding at CREATE TABLE time: if the user specifies + // {@code PROPERTIES("storage_medium"=...)} the mode is STRICT, otherwise + // ADAPTIVE. See {@link MediumAllocationMode}. + private MediumAllocationMode mediumAllocationMode = MediumAllocationMode.ADAPTIVE; + // which columns stored in RowStore column private List rowStoreColumns; @@ -163,6 +174,7 @@ public TableProperty buildProperty(short opCode) { buildInMemory(); buildMinLoadReplicaNum(); buildStorageMedium(); + buildMediumAllocationMode(); buildStoragePolicy(); buildIsBeingSynced(); buildCompactionPolicy(); @@ -535,6 +547,24 @@ public TStorageMedium getStorageMedium() { return storageMedium; } + /** + * Derive the table-level {@link MediumAllocationMode} from the raw properties + * map. Mirrors the hard-binding used by {@link DataProperty}: the user + * explicitly asked for a medium iff {@link PropertyAnalyzer#PROPERTIES_STORAGE_MEDIUM} + * is present and non-empty. + */ + public TableProperty buildMediumAllocationMode() { + String storageMediumStr = properties.get(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM); + mediumAllocationMode = Strings.isNullOrEmpty(storageMediumStr) + ? MediumAllocationMode.ADAPTIVE + : MediumAllocationMode.STRICT; + return this; + } + + public MediumAllocationMode getMediumAllocationMode() { + return mediumAllocationMode; + } + public TableProperty buildStoragePolicy() { storagePolicy = properties.getOrDefault(PropertyAnalyzer.PROPERTIES_STORAGE_POLICY, ""); return this; @@ -910,6 +940,7 @@ public void gsonPostProcess() throws IOException { buildInMemory(); buildMinLoadReplicaNum(); buildStorageMedium(); + buildMediumAllocationMode(); buildStorageFormat(); buildInvertedIndexFileStorageFormat(); buildDataSortInfo(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index 5bf096bc60e0cd..bc10b38f5f600c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -33,6 +33,7 @@ import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.MetaIdGenerator.IdGeneratorBuffer; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -108,7 +109,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa String storagePolicy, IdGeneratorBuffer idGeneratorBuffer, BinlogConfig binlogConfig, - boolean isStorageMediumSpecified) + MediumAllocationMode mediumAllocationMode) throws DdlException { // create base index first. Preconditions.checkArgument(tbl.getBaseIndexId() != -1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java index 34865f06e61432..06d0d8f24d1522 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java @@ -18,6 +18,7 @@ package org.apache.doris.cloud.system; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.cloud.catalog.CloudEnv; import org.apache.doris.cloud.catalog.ComputeGroup; @@ -220,7 +221,7 @@ public void renameVirtualComputeGroup(String computeGroupId, String oldComputeGr @Override public Pair>, TStorageMedium> selectBackendIdsForReplicaCreation( ReplicaAllocation replicaAlloc, Map nextIndexs, - TStorageMedium storageMedium, boolean isStorageMediumSpecified, + TStorageMedium storageMedium, MediumAllocationMode mediumAllocationMode, boolean isOnlyForCheck) throws DdlException { return Pair.of(Maps.newHashMap(), storageMedium); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index 09b4a9f18e815e..e0b2f76e242f31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.DynamicPartitionProperty; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionType; @@ -237,7 +238,7 @@ private static void checkReplicationNum(String val, Database db) throws DdlExcep } ReplicaAllocation replicaAlloc = new ReplicaAllocation(Short.valueOf(val)); Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, Maps.newHashMap(), - null, false, true); + null, MediumAllocationMode.ADAPTIVE, true); } private static void checkReplicaAllocation(ReplicaAllocation replicaAlloc, int hotPartitionNum) @@ -248,14 +249,14 @@ private static void checkReplicaAllocation(ReplicaAllocation replicaAlloc, int h Map nextIndexs = Maps.newHashMap(); Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexs, null, - false, true); + MediumAllocationMode.ADAPTIVE, true); if (hotPartitionNum <= 0) { return; } try { Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexs, - TStorageMedium.SSD, false, true); + TStorageMedium.SSD, MediumAllocationMode.ADAPTIVE, true); } catch (DdlException e) { throw new DdlException("Failed to find enough backend for ssd storage medium. When setting " + DynamicPartitionProperty.HOT_PARTITION_NUM + " > 0, the hot partitions will store " diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 2800ea1157f886..d03a7020807dd1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.EnvFactory; import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ReplicaAllocation; @@ -393,6 +394,7 @@ public static DataProperty analyzeDataProperty(Map properties, f String newStoragePolicy = oldStoragePolicy; boolean hasStoragePolicy = false; boolean storageMediumSpecified = false; + MediumAllocationMode mediumAllocationMode = oldDataProperty.getMediumAllocationMode(); boolean isBeingSynced = false; for (Map.Entry entry : properties.entrySet()) { @@ -505,9 +507,12 @@ public static DataProperty analyzeDataProperty(Map properties, f properties.remove(PROPERTIES_MUTABLE); DataProperty dataProperty = new DataProperty(storageMedium, cooldownTimestamp, newStoragePolicy, mutable); - // check the state of data property + dataProperty.setMediumAllocationMode(mediumAllocationMode); + // Hard-binding semantics: PROPERTIES("storage_medium"=...) -> STRICT. + // If storage_medium is absent, preserve the old mode across unrelated + // data property changes such as cooldown time or storage policy. if (storageMediumSpecified) { - dataProperty.setStorageMediumSpecified(true); + dataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); } return dataProperty; } @@ -1642,7 +1647,7 @@ private static ReplicaAllocation analyzeReplicaAllocationImpl(Map tabletIdSet, IdGeneratorBuffer idGeneratorBuffer, boolean isStorageMediumSpecified) + Set tabletIdSet, IdGeneratorBuffer idGeneratorBuffer, + MediumAllocationMode mediumAllocationMode) throws DdlException { ColocateTableIndex colocateIndex = Env.getCurrentColocateIndex(); SystemInfoService systemInfoService = Env.getCurrentSystemInfo(); @@ -3363,7 +3365,7 @@ public TStorageMedium createTablets(MaterializedIndex index, ReplicaState replic startPos = 0; } else { startPos = systemInfoService.getStartPosOfRoundRobin(tag, storageMedium, - isStorageMediumSpecified); + mediumAllocationMode); } nextIndexs.put(tag, startPos); } @@ -3385,7 +3387,7 @@ public TStorageMedium createTablets(MaterializedIndex index, ReplicaState replic Pair>, TStorageMedium> chosenBackendIdsAndMedium = systemInfoService.selectBackendIdsForReplicaCreation( replicaAlloc, nextIndexs, - storageMedium, isStorageMediumSpecified, false); + storageMedium, mediumAllocationMode, false); chosenBackendIds = chosenBackendIdsAndMedium.first; storageMedium = chosenBackendIdsAndMedium.second; for (Map.Entry> entry : chosenBackendIds.entrySet()) { @@ -3598,7 +3600,7 @@ public void truncateTable(String dbName, String tableName, PartitionNamesInfo pa copiedTbl.getPartitionInfo().getTabletType(oldPartitionId), olapTable.getPartitionInfo().getDataProperty(oldPartitionId).getStoragePolicy(), idGeneratorBuffer, binlogConfig, - copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).isStorageMediumSpecified()); + copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).getMediumAllocationMode()); newPartitions.add(newPartition); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 2ef193cb158cfc..ba2dc50b2aa266 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.cloud.qe.ComputeGroupException; import org.apache.doris.cluster.ClusterGuard; @@ -490,7 +491,8 @@ public int compare(Backend a, Backend b) { // Select the smallest number of tablets as the starting position of // round robin in the BE that match the policy - public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolean isStorageMediumSpecified) { + public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, + MediumAllocationMode mediumAllocationMode) { BeSelectionPolicy.Builder builder = new BeSelectionPolicy.Builder() .needScheduleAvailable() .needCheckDiskUsage() @@ -502,7 +504,9 @@ public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolea BeSelectionPolicy policy = builder.build(); List beIds = selectBackendIdsByPolicy(policy, -1); - if (beIds.isEmpty() && storageMedium != null && !isStorageMediumSpecified) { + // ADAPTIVE: medium is a hint; fall back to the other medium if none of the BEs can host it. + // STRICT : medium is required; do not fall back (caller will surface an error). + if (beIds.isEmpty() && storageMedium != null && mediumAllocationMode != MediumAllocationMode.STRICT) { storageMedium = (storageMedium == TStorageMedium.HDD) ? TStorageMedium.SSD : TStorageMedium.HDD; policy = builder.setStorageMedium(storageMedium).build(); beIds = selectBackendIdsByPolicy(policy, -1); @@ -527,14 +531,15 @@ public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolea * @param replicaAlloc * @param nextIndexs create tablet round robin next be index, when enable_round_robin_create_tablet * @param storageMedium - * @param isStorageMediumSpecified + * @param mediumAllocationMode controls whether the storage medium may fall back when no BE + * can host the requested medium (ADAPTIVE falls back, STRICT does not) * @param isOnlyForCheck set true if only used for check available backend * @return return the selected backend ids group by tag. * @throws DdlException */ public Pair>, TStorageMedium> selectBackendIdsForReplicaCreation( ReplicaAllocation replicaAlloc, Map nextIndexs, - TStorageMedium storageMedium, boolean isStorageMediumSpecified, + TStorageMedium storageMedium, MediumAllocationMode mediumAllocationMode, boolean isOnlyForCheck) throws DdlException { Map copiedBackends = Maps.newHashMap(getAllClusterBackendsNoException()); @@ -568,7 +573,11 @@ public Pair>, TStorageMedium> selectBackendIdsForReplicaCrea // first time empty, retry with different storage medium // if only for check, no need to retry different storage medium to get backend TStorageMedium originalStorageMedium = storageMedium; - if (beIds.isEmpty() && storageMedium != null && !isStorageMediumSpecified && !isOnlyForCheck) { + // ADAPTIVE: retry with the opposite medium if no BE can host the requested one. + // STRICT: caller explicitly asked for this medium, do not fall back. + if (beIds.isEmpty() && storageMedium != null + && mediumAllocationMode != MediumAllocationMode.STRICT + && !isOnlyForCheck) { storageMedium = (storageMedium == TStorageMedium.HDD) ? TStorageMedium.SSD : TStorageMedium.HDD; builder.setStorageMedium(storageMedium); if (Config.enable_round_robin_create_tablet) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index d03d1c2e5f0300..03dadf59c4ed72 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndex.IndexExtState; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionInfo; @@ -151,7 +152,7 @@ public void setUp() throws Exception { Mockito.any(ReplicaAllocation.class), Mockito.anyMap(), Mockito.any(TStorageMedium.class), - Mockito.eq(false), + Mockito.eq(MediumAllocationMode.ADAPTIVE), Mockito.eq(true)); Mockito.doAnswer(inv -> { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java index a53c18680af97a..4d4f0b6b2e8a41 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java @@ -18,11 +18,16 @@ package org.apache.doris.catalog; import org.apache.doris.common.Config; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.thrift.TStorageMedium; import org.junit.Assert; import org.junit.Test; +import java.util.HashMap; +import java.util.Map; + public class DataPropertyTest { @Test @@ -41,4 +46,76 @@ public void testCooldownTimeMs() throws Exception { dataProperty = new DataProperty(TStorageMedium.HDD); Assert.assertEquals(DataProperty.MAX_COOLDOWN_TIME_MS, dataProperty.getCooldownTimeMs()); } + + @Test + public void testDefaultMediumAllocationMode() { + DataProperty dataProperty = new DataProperty(TStorageMedium.HDD); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + + DataProperty dataProperty2 = new DataProperty(TStorageMedium.SSD, + DataProperty.MAX_COOLDOWN_TIME_MS, ""); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty2.getMediumAllocationMode()); + } + + @Test + public void testStrictFlagRoundTrip() { + DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); + dataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); + + String json = GsonUtils.GSON.toJson(dataProperty); + Assert.assertTrue("new field must be persisted", json.contains("mediumAllocationMode")); + Assert.assertTrue(json.contains("STRICT") || json.contains("strict")); + + DataProperty restored = GsonUtils.GSON.fromJson(json, DataProperty.class); + Assert.assertEquals(MediumAllocationMode.STRICT, restored.getMediumAllocationMode()); + Assert.assertTrue("legacy shim must agree with enum", restored.isStorageMediumSpecified()); + } + + /** + * On master the old {@code storageMediumSpecified} boolean had no + * {@code @SerializedName} annotation, so it was never written to the + * image at all. Simulate replaying such an old image (no mediumAllocationMode + * key) and verify we fall back to ADAPTIVE rather than NPE / STRICT. + */ + @Test + public void testOldImageDeserialisesToAdaptive() throws Exception { + String legacyJson = "{" + + "\"storageMedium\":\"HDD\"," + + "\"cooldownTimeMs\":" + DataProperty.MAX_COOLDOWN_TIME_MS + "," + + "\"storagePolicy\":\"\"," + + "\"isMutable\":true" + + "}"; + + DataProperty restored = GsonUtils.GSON.fromJson(legacyJson, DataProperty.class); + restored.gsonPostProcess(); + + Assert.assertEquals(TStorageMedium.HDD, restored.getStorageMedium()); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, restored.getMediumAllocationMode()); + Assert.assertFalse(restored.isStorageMediumSpecified()); + } + + @Test + public void testLegacySetterMapsToEnum() { + DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + + dataProperty.setStorageMediumSpecified(true); + Assert.assertEquals(MediumAllocationMode.STRICT, dataProperty.getMediumAllocationMode()); + + dataProperty.setStorageMediumSpecified(false); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + } + + @Test + public void testAnalyzeDataPropertyPreservesMediumAllocationModeWhenMediumUnspecified() throws Exception { + DataProperty oldDataProperty = new DataProperty(TStorageMedium.SSD); + oldDataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); + + Map properties = new HashMap<>(); + properties.put(PropertyAnalyzer.PROPERTIES_MUTABLE, "false"); + + DataProperty updated = PropertyAnalyzer.analyzeDataProperty(properties, oldDataProperty); + Assert.assertEquals(MediumAllocationMode.STRICT, updated.getMediumAllocationMode()); + Assert.assertFalse(updated.isMutable()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java new file mode 100644 index 00000000000000..fa28d0301fe63a --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; + +import org.junit.Assert; +import org.junit.Test; + +public class MediumAllocationModeTest { + + @Test + public void testFromStringCaseInsensitive() throws AnalysisException { + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("strict")); + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("STRICT")); + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString(" Strict ")); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("adaptive")); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("ADAPTIVE")); + } + + @Test(expected = AnalysisException.class) + public void testFromStringNullThrows() throws AnalysisException { + MediumAllocationMode.fromString(null); + } + + @Test(expected = AnalysisException.class) + public void testFromStringBlankThrows() throws AnalysisException { + MediumAllocationMode.fromString(" "); + } + + @Test(expected = AnalysisException.class) + public void testFromStringUnknownThrows() throws AnalysisException { + MediumAllocationMode.fromString("lax"); + } + + @Test + public void testIsStrictIsAdaptive() { + Assert.assertTrue(MediumAllocationMode.STRICT.isStrict()); + Assert.assertFalse(MediumAllocationMode.STRICT.isAdaptive()); + Assert.assertTrue(MediumAllocationMode.ADAPTIVE.isAdaptive()); + Assert.assertFalse(MediumAllocationMode.ADAPTIVE.isStrict()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java index 550b2c7d6630a8..85d0ea860698a6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java @@ -58,7 +58,7 @@ public void setUp() throws DdlException { Mockito.any(ReplicaAllocation.class), Mockito.anyMap(), Mockito.nullable(TStorageMedium.class), - Mockito.eq(false), + Mockito.eq(MediumAllocationMode.ADAPTIVE), Mockito.eq(true)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java new file mode 100644 index 00000000000000..8db534b5334045 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.util.PropertyAnalyzer; + +import com.google.common.collect.Maps; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class TablePropertyTest { + + @Test + public void testMediumAllocationModeAdaptiveWhenUnspecified() { + Map properties = Maps.newHashMap(); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeStrictWhenSsdSpecified() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, "SSD"); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.STRICT, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeStrictWhenHddSpecified() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, "HDD"); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.STRICT, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeAdaptiveWhenEmptyString() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, ""); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, tableProperty.getMediumAllocationMode()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java index b76c49081d89d1..fbe7a6541c90d2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.MetaIdGenerator.IdGeneratorBuffer; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; @@ -112,7 +113,7 @@ public void testCreateTablets() { try { Env.getCurrentEnv().getInternalCatalog().createTablets(index, ReplicaState.NORMAL, distributionInfo, 0, replicaAlloc, tabletMeta, - tabletIdSet, idGeneratorBuffer, false); + tabletIdSet, idGeneratorBuffer, MediumAllocationMode.ADAPTIVE); } catch (Exception e) { System.out.println("failed to create tablets " + e.getMessage()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java index 033568017d90f3..99e2d2e1e0cdd4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java @@ -19,9 +19,11 @@ import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.Pair; import org.apache.doris.meta.MetaContext; @@ -409,7 +411,7 @@ public void testSelectBackendIdsForReplicaCreation() throws Exception { Map beCounterMap = Maps.newHashMap(); for (int i = 0; i < 30000; ++i) { Pair>, TStorageMedium> ret = infoService.selectBackendIdsForReplicaCreation(replicaAlloc, - Maps.newHashMap(), TStorageMedium.HDD, false, false); + Maps.newHashMap(), TStorageMedium.HDD, MediumAllocationMode.ADAPTIVE, false); Map> res = ret.first; Assert.assertEquals(3, res.get(Tag.DEFAULT_BACKEND_TAG).size()); for (Long beId : res.get(Tag.DEFAULT_BACKEND_TAG)) { @@ -428,6 +430,55 @@ public void testSelectBackendIdsForReplicaCreation() throws Exception { Assert.assertTrue((diff * 1.0 / max) < 0.3); } + /** + * STRICT mode must refuse to fall back to another medium when the requested + * one is missing. Pair with {@link #testAdaptiveFallsBackOnSingleMediumCluster()} + * for the ADAPTIVE behaviour on the same fixture. + */ + @Test(expected = DdlException.class) + public void testStrictThrowsOnSingleMediumCluster() throws Exception { + addBackend(20001, "10.0.0.1", 9050); + Backend be1 = infoService.getBackend(20001); + addDisk(be1, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be1.setAlive(true); + addBackend(20002, "10.0.0.2", 9050); + Backend be2 = infoService.getBackend(20002); + addDisk(be2, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be2.setAlive(true); + addBackend(20003, "10.0.0.3", 9050); + Backend be3 = infoService.getBackend(20003); + addDisk(be3, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be3.setAlive(true); + + ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + infoService.selectBackendIdsForReplicaCreation(replicaAlloc, Maps.newHashMap(), + TStorageMedium.SSD, MediumAllocationMode.STRICT, false); + } + + @Test + public void testAdaptiveFallsBackOnSingleMediumCluster() throws Exception { + addBackend(30001, "10.0.1.1", 9050); + Backend be1 = infoService.getBackend(30001); + addDisk(be1, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be1.setAlive(true); + addBackend(30002, "10.0.1.2", 9050); + Backend be2 = infoService.getBackend(30002); + addDisk(be2, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be2.setAlive(true); + addBackend(30003, "10.0.1.3", 9050); + Backend be3 = infoService.getBackend(30003); + addDisk(be3, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be3.setAlive(true); + + ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + Pair>, TStorageMedium> ret = infoService.selectBackendIdsForReplicaCreation( + replicaAlloc, Maps.newHashMap(), + TStorageMedium.SSD, MediumAllocationMode.ADAPTIVE, false); + Assert.assertEquals("ADAPTIVE must fall back to HDD on a HDD-only cluster", + TStorageMedium.HDD, ret.second); + Assert.assertEquals(3, ret.first.get(Tag.DEFAULT_BACKEND_TAG).size()); + } + private void addDisk(Backend be, String path, TStorageMedium medium, long totalB, long availB) { DiskInfo diskInfo1 = new DiskInfo(path); diskInfo1.setTotalCapacityB(totalB); diff --git a/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy new file mode 100644 index 00000000000000..50522f5973ca6d --- /dev/null +++ b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +import org.apache.doris.regression.suite.ClusterOptions + +// PR-1 compatibility check: the MediumAllocationMode enum replaces the old +// non-persisted storageMediumSpecified boolean on DataProperty. Verify that +// it does not regress user-visible behaviour across a FE restart: +// - explicit storage_medium=SSD -> stays on SSD after restart (STRICT persists) +// - explicit storage_medium=HDD -> stays on HDD after restart (STRICT persists) +// - no storage_medium specified -> default behaviour (ADAPTIVE) preserved +suite("test_medium_allocation_mode_compat", 'docker') { + def options = new ClusterOptions() + options.feConfigs += [ + 'default_storage_medium=HDD', + ] + // mixed cluster: both SSD and HDD are available so STRICT is actually honoured. + options.beDisks = ['SSD=2', 'HDD=2'] + + def collectPartitionMediums = { tbl -> + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${tbl};" + def media = [] + partitions.each { media << it.StorageMedium } + return media + } + + def assertAllEqual = { mediums, expected, tbl -> + log.info("${tbl} storage mediums: ${mediums}") + assertTrue(!mediums.isEmpty(), "${tbl} must have at least one partition") + mediums.each { + assertEquals(expected, it, "${tbl}: expected ${expected}, got ${it}") + } + } + + docker(options) { + def tblExplicitSsd = "medium_mode_explicit_ssd" + def tblExplicitHdd = "medium_mode_explicit_hdd" + def tblImplicit = "medium_mode_implicit" + + [tblExplicitSsd, tblExplicitHdd, tblImplicit].each { + sql "drop table if exists ${it}" + } + + // (1) explicit storage_medium=SSD -> STRICT + sql """ + CREATE TABLE ${tblExplicitSsd} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "SSD", + "replication_num" = "1" + ); + """ + + // (2) explicit storage_medium=HDD -> STRICT + sql """ + CREATE TABLE ${tblExplicitHdd} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "HDD", + "replication_num" = "1" + ); + """ + + // (3) no storage_medium specified -> ADAPTIVE (falls back to default HDD) + sql """ + CREATE TABLE ${tblImplicit} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sleep 1000 + + assertAllEqual(collectPartitionMediums(tblExplicitSsd), "SSD", tblExplicitSsd) + assertAllEqual(collectPartitionMediums(tblExplicitHdd), "HDD", tblExplicitHdd) + assertAllEqual(collectPartitionMediums(tblImplicit), "HDD", tblImplicit) + + // Restart the FE to exercise the new serialized mediumAllocationMode + // field. The legacy storageMediumSpecified boolean was never persisted, + // so this step also covers the bug-fix aspect of PR-1: after a restart + // the STRICT intent must still be remembered. + cluster.restartFrontends() + sleep 5000 + context.reconnectFe() + + assertAllEqual(collectPartitionMediums(tblExplicitSsd), "SSD", tblExplicitSsd) + assertAllEqual(collectPartitionMediums(tblExplicitHdd), "HDD", tblExplicitHdd) + assertAllEqual(collectPartitionMediums(tblImplicit), "HDD", tblImplicit) + } +} + +// Non-docker coverage for the same PR-1 user-visible semantics. The docker +// suite above owns mixed HDD/SSD and FE restart coverage; this suite is meant +// to run against an already-started regression cluster and only asserts the +// behaviours that can be verified from SQL metadata in that environment. +suite("test_medium_allocation_mode_compat_non_docker", 'p0') { + if (isCloudMode()) { + return + } + + def collectAvailableMediums = { -> + def mediums = [] as Set + def backends = sql_return_maparray "SHOW PROC '/backends'" + backends.each { be -> + def disks = sql_return_maparray "SHOW PROC '/backends/${be.BackendId}'" + disks.each { disk -> + if (disk.State == 'ONLINE' && (disk.StorageMedium == 'SSD' || disk.StorageMedium == 'HDD')) { + mediums << disk.StorageMedium + } + } + } + return mediums + } + + def collectPartitionMediums = { tbl -> + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${tbl};" + def media = [] + partitions.each { media << it.StorageMedium } + return media + } + + def assertAllEqual = { mediums, expected, tbl -> + log.info("${tbl} storage mediums: ${mediums}") + assertTrue(!mediums.isEmpty(), "${tbl} must have at least one partition") + mediums.each { + assertEquals(expected, it, "${tbl}: expected ${expected}, got ${it}") + } + } + + def availableMediums = collectAvailableMediums() + log.info("available storage mediums in current non-docker cluster: ${availableMediums}") + assertTrue(!availableMediums.isEmpty(), "current cluster must have at least one HDD or SSD disk") + + // Explicit storage_medium is the user-visible trigger for STRICT. Only test + // mediums that are physically available in the current non-docker cluster so + // the case is stable across developers' local environments. + availableMediums.each { medium -> + def tbl = "medium_mode_non_docker_${medium.toLowerCase()}" + sql "drop table if exists ${tbl}" + sql """ + CREATE TABLE ${tbl} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "${medium}", + "replication_num" = "1" + ); + """ + assertAllEqual(collectPartitionMediums(tbl), medium, tbl) + } + + // No storage_medium specified keeps the existing CREATE TABLE syntax and + // maps internally to ADAPTIVE. The realized medium depends on the current + // cluster, so only assert that it is a real medium available on this cluster. + def tblImplicit = "medium_mode_non_docker_implicit" + sql "drop table if exists ${tblImplicit}" + sql """ + CREATE TABLE ${tblImplicit} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + def implicitMediums = collectPartitionMediums(tblImplicit) + log.info("${tblImplicit} storage mediums: ${implicitMediums}") + assertTrue(!implicitMediums.isEmpty(), "${tblImplicit} must have at least one partition") + implicitMediums.each { + assertTrue(availableMediums.contains(it), "${tblImplicit}: unexpected medium ${it}") + } +}