acceptable-diamond-84047
10/17/2023, 7:51 PMazure-native:containerservice:ManagedCluster (ml-main-prod):
error: Code="BadRequest" Message="A new agent pool was introduced. Adding agent pools to an existing cluster is not allowed through managed cluster operations. For agent pool specific change, please use per agent pool operations: <https://aka.ms/agent-pool-rest-api>" Target="agentPoolProfiles"
billowy-army-68599
acceptable-diamond-84047
10/17/2023, 7:53 PMbillowy-army-68599
acceptable-diamond-84047
10/17/2023, 8:19 PMbillowy-army-68599
acceptable-diamond-84047
10/17/2023, 8:27 PM# Create a Kubernetes cluster
k8s_cluster = containerservice.ManagedCluster(
f"ml-main-{stack_name}",
location=resource_group.location,
resource_group_name=resource_group.name,
agent_pool_profiles=[
# System Node Pool
containerservice.ManagedClusterAgentPoolProfileArgs(
name="systempool",
mode="System",
os_disk_size_gb=30,
count=1,
os_type="Linux",
vm_size="standard_b2pls_v2",
vnet_subnet_id=subnet1.id,
type="VirtualMachineScaleSets",
),
containerservice.ManagedClusterAgentPoolProfileArgs(
name="gpunodepool",
mode="User",
os_type="Ubuntu",
scale_set_priority="Regular",
vm_size="standard_nc6s_v3", # GPU enabled VM
node_labels={"gpu": "true"},
vnet_subnet_id=subnet1.id,
type="VirtualMachineScaleSets",
node_taints=["gpu=true:NoSchedule"],
**stack_gpu_autoscaler_settings[stack_name],
),
],
dns_prefix=f"ml-main-{stack_name}",
enable_rbac=True,
linux_profile={
"admin_username": "someAdmin",
"ssh": {
"publicKeys": [
{
"keyData": AKS_SSH_PUBKEY,
}
]
},
},
service_principal_profile=containerservice.ManagedClusterServicePrincipalProfileArgs(
client_id=app.application_id,
secret=sp_password.value,
),
network_profile=containerservice.ContainerServiceNetworkProfileArgs(
network_plugin="azure",
network_policy="azure",
service_cidr="10.96.0.0/16",
dns_service_ip="10.96.0.10",
),
)
And after:
# Create a Kubernetes cluster
k8s_cluster = containerservice.ManagedCluster(
f"ml-main-{stack_name}",
location=resource_group.location,
resource_group_name=resource_group.name,
agent_pool_profiles=[
# System Node Pool
containerservice.ManagedClusterAgentPoolProfileArgs(
name="systempool",
mode="System",
os_disk_size_gb=30,
count=1,
os_type="Linux",
vm_size="standard_b2pls_v2",
vnet_subnet_id=subnet1.id,
type="VirtualMachineScaleSets",
),
],
dns_prefix=f"ml-main-{stack_name}",
enable_rbac=True,
linux_profile={
"admin_username": "someAdmin",
"ssh": {
"publicKeys": [
{
"keyData": AKS_SSH_PUBKEY,
}
]
},
},
service_principal_profile=containerservice.ManagedClusterServicePrincipalProfileArgs(
client_id=app.application_id,
secret=sp_password.value,
),
network_profile=containerservice.ContainerServiceNetworkProfileArgs(
network_plugin="azure",
network_policy="azure",
service_cidr="10.96.0.0/16",
dns_service_ip="10.96.0.10",
),
)
gpu_nodepool = containerservice.AgentPool(
"gpu_nodepool",
agentpool_name="gpunodepool",
mode="User",
os_type="Ubuntu",
scale_set_priority="Regular",
vm_size="standard_nc6s_v3",
node_labels={"cpu": "true"},
vnet_subnet_id=subnet1.id,
type="VirtualMachineScaleSets",
node_taints=["cpu=true:NoSchedule"],
**stack_gpu_autoscaler_settings[stack_name],
_resource_name_=k8s_cluster.name
resource_group_name=resource_group.name.
)
just moved the GPU nodepool out, and its replacing the whole clusterbillowy-army-68599
acceptable-diamond-84047
10/17/2023, 8:33 PMbillowy-army-68599
acceptable-diamond-84047
10/17/2023, 8:37 PMpulumi refresh
still prompts a recreate_opts_=pulumi.ResourceOptions(_ignore_changes_=["agent_pool_profiles"]),
to the cluster resource, and remove inline agent pools
• Create separate Agent pool resources
• try pulumi up
- this will inevitably fail because of "existing resources"
• Use pulumi import azure-native:containerservice:AgentPool <nodepoolResourceName> /subscriptions/<sid>/resourceGroups/<rg_name>/providers/Microsoft.ContainerService/managedClusters/<clusterName>/agentPools/<existingNodepoolName>
for each existing nodepool
• Refresh, and you are good to gobillowy-army-68599
acceptable-diamond-84047
10/17/2023, 9:17 PM