/var/log/rtf-init.log /var/log/gravity-install.log
Don't enter into gravity, begin by running the following command:
# df -h
# sudo gravity enter
All of the following commands must be run while still inside gravity, please remember to save each output for further study:
# gravity status
# etcdctl cluster-health
# planet status --pretty
# docker info | head
# journalctl -p err -r
# kubectl get nodes
# kubectl get pods -o wide --all-namespaces
# kubectl get events --all-namespaces
# kubectl describe node <NODE_NAME>
# kubectl describe pod <POD_NAME> -n <NAMESPACE_NAME>
# exit
[root@ip-111-11-18-252 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 7.4G 0 7.4G 0% /dev
tmpfs 7.4G 0 7.4G 0% /dev/shm
tmpfs 7.4G 25M 7.4G 1% /run
tmpfs 7.4G 0 7.4G 0% /sys/fs/cgroup
/dev/xvda2 80G 5.1G 75G 7% /
/dev/xvdc 100G 16G 85G 16% /var/lib/gravity
/dev/xvdb 60G 473M 60G 1% /var/lib/gravity/planet/etcd
tmpfs 1.5G 0 1.5G 0% /run/user/1000
[root@ip-111-11-27-235 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 7.4G 0 7.4G 0% /dev
tmpfs 7.4G 0 7.4G 0% /dev/shm
tmpfs 7.4G 25M 7.4G 1% /run
tmpfs 7.4G 0 7.4G 0% /sys/fs/cgroup
/dev/xvda2 80G 2.1G 78G 3% /
/dev/xvdb 100G 6.1G 94G 7% /var/lib/gravity
[root@ip-111-11-27-81 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 7.4G 0 7.4G 0% /dev
tmpfs 7.4G 0 7.4G 0% /dev/shm
tmpfs 7.4G 25M 7.4G 1% /run
tmpfs 7.4G 0 7.4G 0% /sys/fs/cgroup
/dev/xvda2 80G 2.2G 78G 3% /
/dev/xvdb 100G 4.8G 96G 5% /var/lib/gravity
ip-111-11-18-252:/$ gravity status
Cluster status: active
Application: runtime-fabric, version 1.1.1568899909-253f482
Join token: my-cluster-token
Periodic updates: Not Configured
Remote support: Not Configured
Last completed operation:
* operation_expand (24694ccc-607b-42de-a98c-935309d628b7)
started: Mon Oct 7 21:00 UTC (2 days ago)
completed: Mon Oct 7 21:04 UTC (2 days ago)
Cluster endpoints:
* Authentication gateway:
- 111.11.18.252:32009
* Cluster management URL:
- https://111.11.18.252:32009
Cluster nodes: runtime-fabric
Masters:
* ip-111-11-18-252.us-west-2.compute.internal (111.11.18.252, controller_node)
Status: healthy
Nodes:
* ip-111-11-27-81.us-west-2.compute.internal (111.11.27.81, worker_node)
Status: healthy
* ip-111-11-27-235.us-west-2.compute.internal (111.11.27.235, worker_node)
Status: healthy
member 6230b80fbf041027 is healthy: got healthy result from https://111.11.18.252:2379
cluster is healthy
ip-111-11-18-252:/$ planet status --pretty
{
"status": "running",
"nodes": [
{
"name": "111_11_27_81.runtime-fabric",
"member_status": {
"name": "111_11_27_81.runtime-fabric",
"addr": "111.11.27.81:7496",
"status": "alive",
"tags": {
"publicip": "111.11.27.81",
"role": "node"
}
},
"status": "running",
"probes": [
{
"checker": "ip-forward",
"status": "running"
},
{
"checker": "br-netfilter",
"status": "running"
},
{
"checker": "disk-space",
"detail": "disk utilization on /var/lib/gravity is below 80 percent (102 GB is available out of 107 GB)",
"status": "running",
"checker_data": "eyJoaWdoX3dhdGVybWFyayI6ODAsInBhdGgiOiIvdmFyL2xpYi9ncmF2aXR5IiwidG90YWxfYnl0ZXMiOjEwNzMyMTc1MzYwMCwiYXZhaWxhYmxlX2J5dGVzIjoxMDIyNjc0MDAxOTJ9"
},
{
"checker": "may-detach-mounts",
"status": "running"
},
{
"checker": "dns",
"status": "running"
},
{
"checker": "etcd-healthz",
"status": "running"
},
{
"checker": "nodestatus",
"status": "running"
},
{
"checker": "docker",
"status": "running"
},
{
"checker": "kubelet",
"status": "running"
},
{
"checker": "system-version",
"detail": "Linux ip-111-11-27-81.us-west-2.compute.internal 3.10.0-1062.1.2.el7.x86_64 #1 SMP Mon Sep 16 14:19:51 EDT 2019 x86_64 GNU/Linux\n",
"status": "running"
},
{
"checker": "systemd-version",
"detail": "systemd 241 (241)\n+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid\n",
"status": "running"
},
{
"checker": "docker-version",
"detail": "Containers: 10\n Running: 9\n Paused: 0\n Stopped: 1\nImages: 8\nServer Version: 18.06.2-ce\nStorage Driver: overlay2\n Backing Filesystem: xfs\n Supports d_type: true\n Native Overlay Diff: true\nLogging D
river: json-file\nCgroup Driver: cgroupfs\nPlugins:\n Volume: local\n Network: bridge host macvlan null overlay\n Log: awslogs fluentd gcplogs gelf journald json-file logentries splunk syslog\nSwarm: inactive\nRuntimes: runc\nDefaul
t Runtime: runc\nInit Binary: docker-init\ncontainerd version: 468a545b9edcd5932818eb9de8e72413e616e86e\nrunc version: 69663f0bd4b60df09991c08812a60108003fa340\ninit version: fec3683\nSecurity Options:\n seccomp\n Profile: default\
nKernel Version: 3.10.0-1062.1.2.el7.x86_64\nOperating System: Debian GNU/Linux 9 (stretch) (containerized)\nOSType: linux\nArchitecture: x86_64\nCPUs: 2\nTotal Memory: 14.77GiB\nName: ip-111-11-27-81.us-west-2.compute.internal\nID:
I6BN:HGY2:4EMR:JBTX:4MJK:7B73:CYNP:YUYR:J5I2:NT32:GZOK:5BRU\nDocker Root Dir: /ext/docker\nDebug Mode (client): false\nDebug Mode (server): false\nNo Proxy: 0.0.0.0/0,.local\nRegistry: https://index.docker.io/v1/\nLabels:\nExperime
ntal: false\nInsecure Registries:\n 127.0.0.0/8\nLive Restore Enabled: false\n\n",
"status": "running"
},
{
"checker": "etcd-version",
"detail": "etcd Version: 3.3.11\nGit SHA: 2cf9e51d2\nGo Version: go1.10.7\nGo OS/Arch: linux/amd64\n",
"status": "running"
},
{
"checker": "kubelet-version",
"detail": "Kubernetes v1.13.11\n",
"status": "running"
},
{
"checker": "coredns-version",
"detail": "CoreDNS-1.3.1\nlinux/amd64, go1.11.4, 6b56a9c\n",
"status": "running"
},
{
"checker": "dbus-version",
"detail": "D-Bus Message Bus Daemon 1.10.26\nCopyright (C) 2002, 2003 Red Hat, Inc., CodeFactory AB, and others\nThis is free software; see the source for copying conditions.\nThere is NO warranty; not even for MERCHA
NTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
"status": "running"
},
{
"checker": "serf-version",
"detail": "Serf v0.8.0\nAgent Protocol: 4 (Understands back to: 2)\n",
"status": "running"
},
{
"checker": "flanneld-version",
"detail": "0.5.3+git\n",
"status": "running"
},
{
"checker": "registry-version",
"detail": "/usr/bin/registry planet/docker/distribution v2.7.1-gravitational\n",
"status": "running"
}
]
},
{
"name": "111_11_27_235.runtime-fabric",
"member_status": {
"name": "111_11_27_235.runtime-fabric",
"addr": "111.11.27.235:7496",
"status": "alive",
"tags": {
"publicip": "111.11.27.235",
"role": "node"
}
},
"status": "running",
"probes": [
{
"checker": "dns",
"status": "running"
},
{
"checker": "ip-forward",
"status": "running"
},
{
"checker": "disk-space",
"detail": "disk utilization on /var/lib/gravity is below 80 percent (101 GB is available out of 107 GB)",
"status": "running",
"checker_data": "eyJoaWdoX3dhdGVybWFyayI6ODAsInBhdGgiOiIvdmFyL2xpYi9ncmF2aXR5IiwidG90YWxfYnl0ZXMiOjEwNzMyMTc1MzYwMCwiYXZhaWxhYmxlX2J5dGVzIjoxMDA4NDU2MTcxNTJ9"
},
{
"checker": "br-netfilter",
"status": "running"
},
{
"checker": "may-detach-mounts",
"status": "running"
},
{
"checker": "nodestatus",
"status": "running"
},
{
"checker": "docker",
"status": "running"
...
ip-111-11-18-252:/$ docker info | head
Containers: 63
Running: 39
Paused: 0
Stopped: 24
Images: 25
Server Version: 18.06.2-ce
Storage Driver: overlay2
Backing Filesystem: xfs
Supports d_type: true
Native Overlay Diff: true
ip-111-11-18-252:/$ journalctl -p err -r
-- Logs begin at Mon 2019-10-07 20:55:24 UTC, end at Thu 2019-10-10 19:16:12 UTC. --
Oct 07 20:56:01 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:56:01 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:56 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:56 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:50 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:50 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:45 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:45 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:40 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:40 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:35 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Prepackaged Kubernetes container images.
Oct 07 20:55:35 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:35 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:34 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Scheduler.
Oct 07 20:55:34 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Controller Manager.
Oct 07 20:55:29 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Prepackaged Kubernetes container images.
Oct 07 20:55:29 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:29 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
Oct 07 20:55:25 ip-111-11-18-252.us-west-2.compute.internal exim[473]: exim: could not open panic log - aborting: see message(s) above
Oct 07 20:55:25 ip-111-11-18-252.us-west-2.compute.internal exim[473]: 2019-10-07 20:55:25 Cannot open main log file "/var/log/exim4/mainlog": No such file or directory: euid=0 egid=106
Oct 07 20:55:25 ip-111-11-18-252.us-west-2.compute.internal exim[473]: 2019-10-07 20:55:25 socket bind() to port 25 for address 127.0.0.1 failed: Address already in use: waiting 30s before trying again (9 more tries)
Oct 07 20:55:24 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Prepackaged Kubernetes container images.
Oct 07 20:55:24 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Planet Agent service.
Oct 07 20:55:24 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: Failed to start Kubernetes Kubelet.
Oct 07 20:55:24 ip-111-11-18-252.us-west-2.compute.internal systemd[1]: kube-kubelet.service: Failed to load environment files: No such file or directory
ip-111-11-18-252:/$ kubectl get nodes
NAME STATUS ROLES AGE VERSION
111.11.18.252 Ready master 2d22h v1.13.11
111.11.27.235 Ready node 2d22h v1.13.11
111.11.27.81 Ready node 2d22h v1.13.11
ip-111-11-18-252:/$ kubectl get pods -o wide --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
692903b6-32c2-4297-947e-4d4a8f41a150 gusti-proxy-7c499bbd9f-5cn9r 1/2 Error 9 31m 10.244.91.49 111.11.27.235 <none> <none>
692903b6-32c2-4297-947e-4d4a8f41a150 test-app-5bfb549747-7ptn6 2/2 Running 0 2d3h 10.244.86.7 111.11.27.81 <none> <none>
kube-system coredns-vs6fj 1/1 Running 0 2d22h 10.244.100.3 111.11.18.252 <none> <none>
kube-system coredns-worker-btjbv 1/1 Running 0 2d22h 10.244.91.2 111.11.27.235 <none> <none>
kube-system coredns-worker-czrlp 1/1 Running 0 2d22h 10.244.86.2 111.11.27.81 <none> <none>
kube-system dns-app-install-2d8c33-jdbzh 0/1 Completed 0 2d22h 10.244.100.2 111.11.18.252 <none> <none>
kube-system gravity-install-165a32-hvhdp 0/1 Completed 0 2d22h 111.11.18.252 111.11.18.252 <none> <none>
kube-system gravity-site-cbg58 1/1 Running 0 2d22h 111.11.18.252 111.11.18.252 <none> <none>
kube-system log-collector-6df688db4f-bhp4r 1/1 Running 0 2d22h 10.244.100.23 111.11.18.252 <none> <none>
kube-system log-forwarder-pv6dk 1/1 Running 0 2d22h 10.244.86.3 111.11.27.81 <none> <none>
kube-system log-forwarder-td9jd 1/1 Running 0 2d22h 10.244.100.6 111.11.18.252 <none> <none>
kube-system log-forwarder-thmjk 1/1 Running 0 2d22h 10.244.91.3 111.11.27.235 <none> <none>
kube-system logging-app-bootstrap-2b367b-9dfrv 0/1 Completed 0 2d22h 10.244.100.4 111.11.18.252 <none> <none>
kube-system monitoring-app-install-314896-bmvvl 0/1 Completed 0 2d22h 10.244.100.8 111.11.18.252 <none> <none>
kube-system rtf-install-hook-46ea3e-hv7vw 0/1 Completed 0 2d22h 111.11.18.252 111.11.18.252 <none> <none>
kube-system site-app-post-install-4cdac5-9f7j2 0/1 Completed 2 2d22h 10.244.100.18 111.11.18.252 <none> <none>
kube-system tiller-app-bootstrap-b5a987-q69tq 0/1 Completed 0 2d22h 10.244.100.15 111.11.18.252 <none> <none>
kube-system tiller-deploy-cf96c5bd9-bptwr 1/1 Running 0 2d22h 10.244.100.20 111.11.18.252 <none> <none>
monitoring grafana-85fbf44fc8-5td2v 2/2 Running 0 2d22h 10.244.100.22 111.11.18.252 <none> <none>
monitoring heapster-784767889f-jk68s 1/1 Running 0 2d22h 10.244.100.11 111.11.18.252 <none> <none>
monitoring influxdb-7c6849cccf-d522j 2/2 Running 0 2d22h 10.244.100.19 111.11.18.252 <none> <none>
monitoring kapacitor-8684bb5678-mjwkf 3/3 Running 0 2d22h 10.244.100.30 111.11.18.252 <none> <none>
monitoring telegraf-89dbf98bd-4nd2r 1/1 Running 0 2d22h 10.244.100.12 111.11.18.252 <none> <none>
monitoring telegraf-node-jx68w 1/1 Running 0 2d22h 10.244.91.4 111.11.27.235 <none> <none>
monitoring telegraf-node-kf9fr 1/1 Running 0 2d22h 10.244.100.14 111.11.18.252 <none> <none>
monitoring telegraf-node-mkj6t 1/1 Running 0 2d22h 10.244.86.4 111.11.27.81 <none> <none>
rtf am-connectivity-monitor-1570665600-k6dzg 0/1 Completed 0 19h 10.244.91.43 111.11.27.235 <none> <none>
rtf deployer-57f6d74859-5j24q 1/1 Running 0 2d22h 10.244.100.28 111.11.18.252 <none> <none>
rtf edge-443-6c6d8c666f-b9qdx 1/1 Running 0 2d21h 111.11.18.252 111.11.18.252 <none> <none>
rtf monitor-7d5cbd7bc6-6xwf6 1/1 Running 0 2d22h 10.244.100.26 111.11.18.252 <none> <none>
rtf mule-clusterip-service-74fbbc98c7-42cfn 1/1 Running 0 2d22h 10.244.100.25 111.11.18.252 <none> <none>
rtf registry-creds-1570730400-n9f4q 0/1 Completed 0 81m 10.244.100.39 111.11.18.252 <none> <none>
rtf resource-cache-7c955788-shhsk 2/2 Running 0 2d22h 10.244.100.27 111.11.18.252 <none> <none>
rtf telegraf-6b8b4874cc-gwbzq 1/1 Running 0 2d22h 10.244.100.29 111.11.18.252 <none> <none>
8. If you find rtf node is in a degraded state and there is a message mentioning that a specific module is not loaded, eg. "br_netfilter module is not loaded on node", make sure all prereqs are met from Required Network Settings
To load a required module work with your Linux administrator or refer to your distribution documentation. Pay special attention that the modules are loaded after a reboot of the node as well.
9. The most helpful troubleshooting for RTF issues is the diagnostic package, which contains Kubernetes logs, app-level logs, and cluster logs all in one bundle. Please upload the logs to the case. Here are instructions on getting the logs: What Logs to Collect for an RTF Case
001116966

We use three kinds of cookies on our websites: required, functional, and advertising. You can choose whether functional and advertising cookies apply. Click on the different cookie categories to find out more about each category and to change the default settings.
Privacy Statement
Required cookies are necessary for basic website functionality. Some examples include: session cookies needed to transmit the website, authentication cookies, and security cookies.
Functional cookies enhance functions, performance, and services on the website. Some examples include: cookies used to analyze site traffic, cookies used for market research, and cookies used to display advertising that is not directed to a particular individual.
Advertising cookies track activity across websites in order to understand a viewer’s interests, and direct them specific marketing. Some examples include: cookies used for remarketing, or interest-based advertising.