From d857fd00b3131fdc2e613a49f3cd1c5677a2a04c Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 13 Dec 2022 15:37:35 -0800 Subject: [PATCH] cmd/k8s-operator: sprinkle debug logging throughout. As is convention in the k8s world, use zap for structured logging. For development, OPERATOR_LOGGING=dev switches to a more human-readable output than JSON. Updates #502 Signed-off-by: David Anderson --- cmd/k8s-operator/operator.go | 104 ++++++++++++++++++++---------- cmd/k8s-operator/operator_test.go | 21 ++++++ go.mod | 4 +- 3 files changed, 92 insertions(+), 37 deletions(-) diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 3e7f56dec..9c0f7562c 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -10,11 +10,13 @@ import ( "context" _ "embed" "fmt" - "log" "os" "strings" "time" + "github.com/go-logr/zapr" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" "golang.org/x/exp/slices" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -29,7 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/handler" logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" + kzap "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -46,31 +48,42 @@ func main() { hostname = defaultEnv("OPERATOR_HOSTNAME", "tailscale-operator") kubeSecret = defaultEnv("OPERATOR_SECRET", "") tsNamespace = defaultEnv("OPERATOR_NAMESPACE", "default") + tslogging = defaultEnv("OPERATOR_LOGGING", "info") image = defaultEnv("PROXY_IMAGE", "tailscale/tailscale:latest") tags = defaultEnv("PROXY_TAGS", "tag:k8s") ) - // TODO: use logpolicy tailscale.I_Acknowledge_This_API_Is_Unstable = true - logf.SetLogger(zap.New()) + var opts []kzap.Opts + switch tslogging { + case "info": + opts = append(opts, kzap.Level(zapcore.InfoLevel)) + case "debug": + opts = append(opts, kzap.Level(zapcore.DebugLevel)) + case "dev": + opts = append(opts, kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel)) + } + zlog := kzap.NewRaw(opts...).Sugar() + logf.SetLogger(zapr.NewLogger(zlog.Desugar())) + startlog := zlog.Named("startup") s := &tsnet.Server{ Hostname: hostname, - Logf: logger.Discard, + Logf: zlog.Named("tailscaled").Debugf, } if kubeSecret != "" { st, err := kubestore.New(logger.Discard, kubeSecret) if err != nil { - log.Fatalf("creating kube store: %v", err) + startlog.Fatalf("creating kube store: %v", err) } s.Store = st } if err := s.Start(); err != nil { - log.Fatalf("starting tailscale server: %v", err) + startlog.Fatalf("starting tailscale server: %v", err) } defer s.Close() lc, err := s.LocalClient() if err != nil { - log.Fatalf("getting local client: %v", err) + startlog.Fatalf("getting local client: %v", err) } ctx := context.Background() @@ -80,23 +93,23 @@ waitOnline: for { st, err := lc.StatusWithoutPeers(ctx) if err != nil { - log.Fatalf("getting status: %v", err) + startlog.Fatalf("getting status: %v", err) } switch st.BackendState { case "Running": break waitOnline case "NeedsLogin": if !loginShown && st.AuthURL != "" { - log.Printf("tailscale needs login, please visit: %s", st.AuthURL) + startlog.Infof("tailscale needs login, please visit: %s", st.AuthURL) loginShown = true } case "NeedsMachineAuth": if !machineAuthShown { - log.Printf("Machine authorization required, please visit the admin panel to authorize") + startlog.Infof("Machine authorization required, please visit the admin panel to authorize") machineAuthShown = true } default: - log.Printf("waiting for tailscale to start: %v", st.BackendState) + startlog.Debugf("waiting for tailscale to start: %v", st.BackendState) } time.Sleep(time.Second) } @@ -119,17 +132,18 @@ waitOnline: }), }) if err != nil { - log.Fatalf("could not create manager: %v", err) + startlog.Fatalf("could not create manager: %v", err) } tsClient, err := s.APIClient() if err != nil { - log.Fatalf("getting tailscale client: %v", err) + startlog.Fatalf("getting tailscale client: %v", err) } sr := &ServiceReconciler{ tsClient: tsClient, defaultTags: strings.Split(tags, ","), operatorNamespace: tsNamespace, proxyImage: image, + logger: zlog.Named("service-reconciler"), } reconcileFilter := handler.EnqueueRequestsFromMapFunc(func(o client.Object) []reconcile.Request { ls := o.GetLabels() @@ -155,12 +169,12 @@ waitOnline: Watches(&source.Kind{Type: &corev1.Secret{}}, reconcileFilter). Complete(sr) if err != nil { - log.Fatalf("could not create controller: %v", err) + startlog.Fatalf("could not create controller: %v", err) } - log.Printf("Startup complete, operator running") + startlog.Infof("Startup complete, operator running") if err := mgr.Start(signals.SetupSignalHandler()); err != nil { - log.Fatalf("could not start manager: %v", err) + startlog.Fatalf("could not start manager: %v", err) } } @@ -183,6 +197,7 @@ type ServiceReconciler struct { defaultTags []string operatorNamespace string proxyImage string + logger *zap.SugaredLogger } type tsClient interface { @@ -209,9 +224,10 @@ func childResourceLabels(parent *corev1.Service) map[string]string { // // This function is responsible for removing the finalizer from the service, // once all associated resources are gone. -func (a *ServiceReconciler) cleanupIfRequired(ctx context.Context, svc *corev1.Service) (reconcile.Result, error) { +func (a *ServiceReconciler) cleanupIfRequired(ctx context.Context, logger *zap.SugaredLogger, svc *corev1.Service) (reconcile.Result, error) { ix := slices.Index(svc.Finalizers, FinalizerName) if ix < 0 { + logger.Debugf("no finalizer, nothing to do") return reconcile.Result{}, nil } @@ -231,12 +247,14 @@ func (a *ServiceReconciler) cleanupIfRequired(ctx context.Context, svc *corev1.S if !sts.GetDeletionTimestamp().IsZero() { // Deletion in progress, check again later. We'll get another // notification when the deletion is complete. + logger.Debugf("waiting for statefulset %s/%s deletion", sts.GetNamespace(), sts.GetName()) return reconcile.Result{}, nil } err := a.DeleteAllOf(ctx, &appsv1.StatefulSet{}, client.InNamespace(a.operatorNamespace), client.MatchingLabels(ml), client.PropagationPolicy(metav1.DeletePropagationForeground)) if err != nil { return reconcile.Result{}, fmt.Errorf("deleting statefulset: %w", err) } + logger.Debugf("started deletion of statefulset %s/%s", sts.GetNamespace(), sts.GetName()) return reconcile.Result{}, nil } @@ -267,6 +285,11 @@ func (a *ServiceReconciler) cleanupIfRequired(ctx context.Context, svc *corev1.S return reconcile.Result{}, fmt.Errorf("failed to remove finalizer: %w", err) } + // Unlike most log entries in the reconcile loop, this will get printed + // exactly once at the very end of cleanup, because the final step of + // cleanup removes the tailscale finalizer, which will make all future + // reconciles exit early. + logger.Infof("unexposed service from tailnet") return reconcile.Result{}, nil } @@ -287,32 +310,35 @@ func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool { } func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { - defer func() { - if err != nil { - log.Printf("error reconciling %s/%s: %v", req.Namespace, req.Name, err) - } - }() + logger := a.logger.With("service-ns", req.Namespace, "service-name", req.Name) + logger.Debugf("starting reconcile") + defer logger.Debugf("reconcile finished") svc := new(corev1.Service) err = a.Get(ctx, req.NamespacedName, svc) if err != nil { if apierrors.IsNotFound(err) { // Request object not found, could have been deleted after reconcile request. + logger.Debugf("service not found, assuming it was deleted") return reconcile.Result{}, nil } return reconcile.Result{}, fmt.Errorf("failed to get svc: %w", err) } - if !svc.DeletionTimestamp.IsZero() || !a.shouldExpose(svc) { - return a.cleanupIfRequired(ctx, svc) - } - if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" { - log.Printf("%s has ClusterIP=%q; nothing to do", svc.Name, svc.Spec.ClusterIP) + logger.Debugf("service is headless, nothing to do") return reconcile.Result{}, nil } - log.Printf("exposing %s", svc.Name) + if !svc.DeletionTimestamp.IsZero() || !a.shouldExpose(svc) { + logger.Debugf("service is being deleted or should not be exposed, cleaning up") + return a.cleanupIfRequired(ctx, logger, svc) + } if !slices.Contains(svc.Finalizers, FinalizerName) { + // This log line is printed exactly once during initial provisioning, + // because once the finalizer is in place this block gets skipped. So, + // this is a nice place to tell the operator that the high level, + // multi-reconcile operation is underway. + logger.Infof("exposing service over tailscale") svc.Finalizers = append(svc.Finalizers, FinalizerName) if err := a.Update(ctx, svc); err != nil { return reconcile.Result{}, fmt.Errorf("failed to add finalizer: %w", err) @@ -320,7 +346,7 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request } // Do full reconcile. - hsvc, err := a.reconcileHeadlessService(ctx, svc) + hsvc, err := a.reconcileHeadlessService(ctx, logger, svc) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to reconcile headless service: %w", err) } @@ -329,16 +355,17 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request if tstr, ok := svc.Annotations[AnnotationTags]; ok { tags = strings.Split(tstr, ",") } - secretName, err := a.createOrGetSecret(ctx, svc, hsvc, tags) + secretName, err := a.createOrGetSecret(ctx, logger, svc, hsvc, tags) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to create or get API key secret: %w", err) } - _, err = a.reconcileSTS(ctx, svc, hsvc, secretName) + _, err = a.reconcileSTS(ctx, logger, svc, hsvc, secretName) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to reconcile statefulset: %w", err) } if !a.hasLoadBalancerClass(svc) { + logger.Debugf("service is not a LoadBalancer, so not updating ingress") return reconcile.Result{}, nil } @@ -347,6 +374,7 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request return reconcile.Result{}, fmt.Errorf("failed to get device ID: %w", err) } if tsHost == "" { + logger.Debugf("no Tailscale hostname known yet, waiting for proxy pod to finish auth") // No hostname yet. Wait for the proxy pod to auth. svc.Status.LoadBalancer.Ingress = nil if err := a.Status().Update(ctx, svc); err != nil { @@ -355,6 +383,7 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request return reconcile.Result{RequeueAfter: 10 * time.Second}, nil } + logger.Debugf("setting ingress hostname to %q", tsHost) svc.Status.LoadBalancer.Ingress = []corev1.LoadBalancerIngress{ { Hostname: tsHost, @@ -366,7 +395,7 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request return reconcile.Result{}, nil } -func (a *ServiceReconciler) reconcileHeadlessService(ctx context.Context, svc *corev1.Service) (*corev1.Service, error) { +func (a *ServiceReconciler) reconcileHeadlessService(ctx context.Context, logger *zap.SugaredLogger, svc *corev1.Service) (*corev1.Service, error) { hsvc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "ts-" + svc.Name + "-", @@ -380,10 +409,11 @@ func (a *ServiceReconciler) reconcileHeadlessService(ctx context.Context, svc *c }, }, } + logger.Debugf("reconciling headless service for StatefulSet") return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, svc, hsvc *corev1.Service, tags []string) (string, error) { +func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, svc, hsvc *corev1.Service, tags []string) (string, error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ // Hardcode a -0 suffix so that in future, if we support @@ -395,6 +425,7 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, svc, hsvc *co }, } if err := a.Get(ctx, client.ObjectKeyFromObject(secret), secret); err == nil { + logger.Debugf("secret %s/%s already exists", secret.GetNamespace(), secret.GetName()) return secret.Name, nil } else if !apierrors.IsNotFound(err) { return "", err @@ -410,10 +441,12 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, svc, hsvc *co if sts != nil { // StatefulSet exists, so we have already created the secret. // If the secret is missing, they should delete the StatefulSet. + logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName()) return "", nil } // Create API Key secret which is going to be used by the statefulset // to authenticate with Tailscale. + logger.Debugf("creating authkey for new tailscale proxy") authKey, err := a.newAuthKey(ctx, tags) if err != nil { return "", err @@ -466,7 +499,7 @@ func (a *ServiceReconciler) newAuthKey(ctx context.Context, tags []string) (stri //go:embed manifests/proxy.yaml var proxyYaml []byte -func (a *ServiceReconciler) reconcileSTS(ctx context.Context, parentSvc, headlessSvc *corev1.Service, authKeySecret string) (*appsv1.StatefulSet, error) { +func (a *ServiceReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, parentSvc, headlessSvc *corev1.Service, authKeySecret string) (*appsv1.StatefulSet, error) { var ss appsv1.StatefulSet if err := yaml.Unmarshal(proxyYaml, &ss); err != nil { return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err) @@ -496,6 +529,7 @@ func (a *ServiceReconciler) reconcileSTS(ctx context.Context, parentSvc, headles ss.Spec.Template.ObjectMeta.Labels = map[string]string{ "app": string(parentSvc.UID), } + logger.Debugf("reconciling statefulset %s/%s", ss.GetNamespace(), ss.GetName()) return createOrUpdate(ctx, a.Client, a.operatorNamespace, &ss, func(s *appsv1.StatefulSet) { s.Spec = ss.Spec }) } diff --git a/cmd/k8s-operator/operator_test.go b/cmd/k8s-operator/operator_test.go index 8b559fddb..4c0461354 100644 --- a/cmd/k8s-operator/operator_test.go +++ b/cmd/k8s-operator/operator_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "go.uber.org/zap" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" @@ -28,12 +29,17 @@ import ( func TestLoadBalancerClass(t *testing.T) { fc := fake.NewFakeClient() ft := &fakeTSClient{} + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } sr := &ServiceReconciler{ Client: fc, tsClient: ft, defaultTags: []string{"tag:k8s"}, operatorNamespace: "operator-ns", proxyImage: "tailscale/tailscale", + logger: zl.Sugar(), } // Create a service that we should manage, and check that the initial round @@ -142,12 +148,17 @@ func TestLoadBalancerClass(t *testing.T) { func TestAnnotations(t *testing.T) { fc := fake.NewFakeClient() ft := &fakeTSClient{} + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } sr := &ServiceReconciler{ Client: fc, tsClient: ft, defaultTags: []string{"tag:k8s"}, operatorNamespace: "operator-ns", proxyImage: "tailscale/tailscale", + logger: zl.Sugar(), } // Create a service that we should manage, and check that the initial round @@ -234,12 +245,17 @@ func TestAnnotations(t *testing.T) { func TestAnnotationIntoLB(t *testing.T) { fc := fake.NewFakeClient() ft := &fakeTSClient{} + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } sr := &ServiceReconciler{ Client: fc, tsClient: ft, defaultTags: []string{"tag:k8s"}, operatorNamespace: "operator-ns", proxyImage: "tailscale/tailscale", + logger: zl.Sugar(), } // Create a service that we should manage, and check that the initial round @@ -347,12 +363,17 @@ func TestAnnotationIntoLB(t *testing.T) { func TestLBIntoAnnotation(t *testing.T) { fc := fake.NewFakeClient() ft := &fakeTSClient{} + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } sr := &ServiceReconciler{ Client: fc, tsClient: ft, defaultTags: []string{"tag:k8s"}, operatorNamespace: "operator-ns", proxyImage: "tailscale/tailscale", + logger: zl.Sugar(), } // Create a service that we should manage, and check that the initial round diff --git a/go.mod b/go.mod index e69258390..33f409ae3 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/frankban/quicktest v1.14.0 github.com/fxamacker/cbor/v2 v2.4.0 github.com/go-json-experiment/json v0.0.0-20221017203807-c5ed296b8c92 + github.com/go-logr/zapr v1.2.3 github.com/go-ole/go-ole v1.2.6 github.com/godbus/dbus/v5 v5.0.6 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da @@ -64,6 +65,7 @@ require ( github.com/toqueteos/webbrowser v1.2.0 github.com/u-root/u-root v0.9.1-0.20221111022710-6e9699743f5d github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54 + go.uber.org/zap v1.21.0 go4.org/mem v0.0.0-20210711025021-927187094b94 go4.org/netipx v0.0.0-20220725152314-7e7bdc8411bf golang.org/x/crypto v0.3.0 @@ -154,7 +156,6 @@ require ( github.com/go-git/go-billy/v5 v5.3.1 // indirect github.com/go-git/go-git/v5 v5.4.2 // indirect github.com/go-logr/logr v1.2.3 // indirect - github.com/go-logr/zapr v1.2.3 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.5 // indirect github.com/go-openapi/swag v0.19.14 // indirect @@ -297,7 +298,6 @@ require ( github.com/yeya24/promlinter v0.1.0 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect - go.uber.org/zap v1.21.0 // indirect golang.org/x/exp/typeparams v0.0.0-20220328175248-053ad81199eb // indirect golang.org/x/image v0.0.0-20201208152932-35266b937fa6 // indirect golang.org/x/mod v0.6.0 // indirect