cmd/k8s-operator: correctly determine cluster domain (#11512)

Kubernetes cluster domain defaults to 'cluster.local', but can also be customized.
We need to determine cluster domain to set up in-cluster forwarding to our egress proxies.
This was previously hardcoded to 'cluster.local', so was the egress proxies were not usable in clusters with custom domains.
This PR ensures that we attempt to determine the cluster domain by parsing /etc/resolv.conf.
In case the cluster domain cannot be determined from /etc/resolv.conf, we fall back to 'cluster.local'.

Updates tailscale/tailscale#10399,tailscale/tailscale#11445

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2024-04-19 16:49:46 +01:00 committed by GitHub
parent d16c1293e9
commit bbe194c80d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 139 additions and 4 deletions

View File

@ -264,6 +264,7 @@ func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string
logger: zlog.Named("service-reconciler"),
isDefaultLoadBalancer: isDefaultLoadBalancer,
recorder: eventRecorder,
tsNamespace: tsNamespace,
})
if err != nil {
startlog.Fatalf("could not create service reconciler: %v", err)

View File

@ -20,7 +20,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/net/dns/resolvconffile"
"tailscale.com/types/ptr"
"tailscale.com/util/dnsname"
"tailscale.com/util/mak"
)
@ -1352,3 +1354,81 @@ func Test_serviceHandlerForIngress(t *testing.T) {
t.Errorf("unexpected reconcile request for a Service that does not belong to any Ingress: %#+v\n", gotReqs)
}
}
func Test_clusterDomainFromResolverConf(t *testing.T) {
zl, err := zap.NewDevelopment()
if err != nil {
t.Fatal(err)
}
tests := []struct {
name string
conf *resolvconffile.Config
namespace string
want string
}{
{
name: "success- custom domain",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "department.org.io")},
},
namespace: "foo",
want: "department.org.io",
},
{
name: "success- default domain",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.cluster.local."), toFQDN(t, "svc.cluster.local."), toFQDN(t, "cluster.local.")},
},
namespace: "foo",
want: "cluster.local",
},
{
name: "only two search domains found",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "svc.department.org.io"), toFQDN(t, "department.org.io")},
},
namespace: "foo",
want: "cluster.local",
},
{
name: "first search domain does not match the expected structure",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.bar.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "some.other.fqdn")},
},
namespace: "foo",
want: "cluster.local",
},
{
name: "second search domain does not match the expected structure",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "foo.department.org.io"), toFQDN(t, "some.other.fqdn")},
},
namespace: "foo",
want: "cluster.local",
},
{
name: "third search domain does not match the expected structure",
conf: &resolvconffile.Config{
SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "some.other.fqdn")},
},
namespace: "foo",
want: "cluster.local",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := clusterDomainFromResolverConf(tt.conf, tt.namespace, zl.Sugar()); got != tt.want {
t.Errorf("clusterDomainFromResolverConf() = %v, want %v", got, tt.want)
}
})
}
}
func toFQDN(t *testing.T, s string) dnsname.FQDN {
t.Helper()
fqdn, err := dnsname.ToFQDN(s)
if err != nil {
t.Fatalf("error coverting %q to dnsname.FQDN: %v", s, err)
}
return fqdn
}

View File

@ -22,10 +22,16 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/net/dns/resolvconffile"
"tailscale.com/util/clientmetric"
"tailscale.com/util/set"
)
const (
resolvConfPath = "/etc/resolv.conf"
defaultClusterDomain = "cluster.local"
)
type ServiceReconciler struct {
client.Client
ssr *tailscaleSTSReconciler
@ -42,6 +48,8 @@ type ServiceReconciler struct {
managedEgressProxies set.Slice[types.UID]
recorder record.EventRecorder
tsNamespace string
}
var (
@ -225,10 +233,8 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
}
if sts.TailnetTargetIP != "" || sts.TailnetTargetFQDN != "" {
// TODO (irbekrm): cluster.local is the default DNS name, but
// can be changed by users. Make this configurable or figure out
// how to discover the DNS name from within operator
headlessSvcName := hsvc.Name + "." + hsvc.Namespace + ".svc.cluster.local"
clusterDomain := retrieveClusterDomain(a.tsNamespace, logger)
headlessSvcName := hsvc.Name + "." + hsvc.Namespace + ".svc." + clusterDomain
if svc.Spec.ExternalName != headlessSvcName || svc.Spec.Type != corev1.ServiceTypeExternalName {
svc.Spec.ExternalName = headlessSvcName
svc.Spec.Selector = nil
@ -344,3 +350,51 @@ func proxyClassIsReady(ctx context.Context, name string, cl client.Client) (bool
}
return tsoperator.ProxyClassIsReady(proxyClass), nil
}
// retrieveClusterDomain determines and retrieves cluster domain i.e
// (cluster.local) in which this Pod is running by parsing search domains in
// /etc/resolv.conf. If an error is encountered at any point during the process,
// defaults cluster domain to 'cluster.local'.
func retrieveClusterDomain(namespace string, logger *zap.SugaredLogger) string {
logger.Infof("attempting to retrieve cluster domain..")
conf, err := resolvconffile.ParseFile(resolvConfPath)
if err != nil {
// Vast majority of clusters use the cluster.local domain, so it
// is probably better to fall back to that than error out.
logger.Infof("[unexpected] error parsing /etc/resolv.conf to determine cluster domain, defaulting to 'cluster.local'.")
return defaultClusterDomain
}
return clusterDomainFromResolverConf(conf, namespace, logger)
}
// clusterDomainFromResolverConf attempts to retrieve cluster domain from the provided resolver config.
// It expects the first three search domains in the resolver config to be be ['<namespace>.svc.<cluster-domain>, svc.<cluster-domain>, <cluster-domain>, ...]
// If the first three domains match the expected structure, it returns the third.
// If the domains don't match the expected structure or an error is encountered, it defaults to 'cluster.local' domain.
func clusterDomainFromResolverConf(conf *resolvconffile.Config, namespace string, logger *zap.SugaredLogger) string {
if len(conf.SearchDomains) < 3 {
logger.Infof("[unexpected] resolver config contains only %d search domains, at least three expected.\nDefaulting cluster domain to 'cluster.local'.")
return defaultClusterDomain
}
first := conf.SearchDomains[0]
if !strings.HasPrefix(string(first), namespace+".svc") {
logger.Infof("[unexpected] first search domain in resolver config is %s; expected %s.\nDefaulting cluster domain to 'cluster.local'.", first, namespace+".svc.<cluster-domain>")
return defaultClusterDomain
}
second := conf.SearchDomains[1]
if !strings.HasPrefix(string(second), "svc") {
logger.Infof("[unexpected] second search domain in resolver config is %s; expected 'svc.<cluster-domain>'.\nDefaulting cluster domain to 'cluster.local'.", second)
return defaultClusterDomain
}
// Trim the trailing dot for backwards compatibility purposes as the
// cluster domain was previously hardcoded to 'cluster.local' without a
// trailing dot.
probablyClusterDomain := strings.TrimPrefix(second.WithoutTrailingDot(), "svc.")
third := conf.SearchDomains[2]
if !strings.EqualFold(third.WithoutTrailingDot(), probablyClusterDomain) {
logger.Infof("[unexpected] expected resolver config to contain serch domains <namespace>.svc.<cluster-domain>, svc.<cluster-domain>, <cluster-domain>; got %s %s %s\n. Defaulting cluster domain to 'cluster.local'.", first, second, third)
return defaultClusterDomain
}
logger.Infof("Cluster domain %q extracted from resolver config", probablyClusterDomain)
return probablyClusterDomain
}