Kubernetes部署存储Rook Ceph

SuKai August 15, 2021

Rook 是一个可以提供 Ceph 集群管理能力的 Operator。Rook 使用 CRD 一个控制器来对 Ceph 之类的资源进行部署和管理。Rook Ceph要求存储设备为块设备,支持分区或者整块硬盘。

| 磁盘分区

sukai@ceph-01:~$ sudo pvcreate /dev/sda
WARNING: ext4 signature detected on /dev/sda at offset 1080. Wipe it? [y/n]: y
  Wiping ext4 signature on /dev/sda.
  Physical volume "/dev/sda" successfully created.
sukai@ceph-01:~$ sudo vgcreate data /dev/sda
  Volume group "data" successfully created
sukai@ceph-01:~$ sudo pvs
  PV         VG   Fmt  Attr PSize  PFree
  /dev/sda   data lvm2 a--  <9.10t <9.10t
sukai@ceph-01:~$ sudo vgs
  VG   #PV #LV #SN Attr   VSize  VFree
  data   1   0   0 wz--n- <9.10t <9.10t
sukai@ceph-01:~$
sukai@ceph-01:~$ sudo lvcreate -L 1024G -n lv1 data
  Logical volume "lv1" created.
sukai@ceph-01:~$ sudo vgs
  VG   #PV #LV #SN Attr   VSize  VFree
  data   1   4   0 wz--n- <9.10t <5.10t
sukai@ceph-01:~$ sudo lvs
  LV   VG   Attr       LSize Pool Origin Data%  Meta%  Move Log Cpy%Sync Convert
  lv1  data -wi-a----- 1.00t
  lv2  data -wi-a----- 1.00t
  lv3  data -wi-a----- 1.00t
  lv4  data -wi-a----- 1.00t
  
  sukai@ceph-01:~/sukai/ceph$ lsblk -f
NAME        FSTYPE      LABEL UUID                                   FSAVAIL FSUSE% MOUNTPOINT
loop0       squashfs                                                       0   100% /snap/core18/1988
loop1       squashfs                                                       0   100% /snap/bare/5
loop2       squashfs                                                       0   100% /snap/core18/2253
loop3       squashfs                                                       0   100% /snap/gnome-3-34-1804/66
loop4       squashfs                                                       0   100% /snap/gnome-3-34-1804/77
loop5       squashfs                                                       0   100% /snap/gtk-common-themes/1514
loop6       squashfs                                                       0   100% /snap/gtk-common-themes/1519
loop7       squashfs                                                       0   100% /snap/gnome-3-38-2004/87
loop8       squashfs                                                       0   100% /snap/snapd/11036
loop9       squashfs                                                       0   100% /snap/snapd/14066
loop10      squashfs                                                       0   100% /snap/snap-store/518
loop11      squashfs                                                       0   100% /snap/core20/1242
loop12      squashfs                                                       0   100% /snap/snap-store/558
sda         LVM2_member       1GlwRl-CMVl-PHdb-3RJe-p7r0-Wisd-x1EdxW
├─data-lv1
├─data-lv2
├─data-lv3
└─data-lv4
nvme0n1
├─nvme0n1p1 vfat              000B-2326                               505.8M     1% /boot/efi
└─nvme0n1p2 ext4              d63faaf1-b460-4d9b-9384-aa2d78690656     87.3G    20% /

| 创建StorageClass和PersistentVolume

sukai@ceph-01:~/sukai/ceph$ cat storageclass.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: manual
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer


sukai@ceph-01:~/sukai/ceph$ cat cluster-sukai-pvc.yaml
---
apiVersion: v1
kind: PersistentVolume
metadata:
  name: data-lv1
spec:
  storageClassName: manual
  capacity:
    storage: 1024Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  volumeMode: Block
  local:
    path: /dev/disk/by-id/dm-name-data-lv1
  nodeAffinity:
      required:
        nodeSelectorTerms:
          - matchExpressions:
              - key: kubernetes.io/hostname
                operator: In
                values:
                - ceph-01
---

| 部署Rook operator

kubectl apply -f common.yaml -f crds.yaml -f operator.yaml

| 配置Rook使用的镜像

kubectl -n rook-ceph edit configmap rook-ceph-operator-config
apiVersion: v1
data:
  CSI_CEPHFS_FSGROUPPOLICY: None
  CSI_ENABLE_CEPHFS_SNAPSHOTTER: "true"
  CSI_ENABLE_RBD_SNAPSHOTTER: "true"
  CSI_ENABLE_VOLUME_REPLICATION: "false"
  CSI_FORCE_CEPHFS_KERNEL_CLIENT: "true"
  CSI_PROVISIONER_REPLICAS: "2"
  CSI_RBD_FSGROUPPOLICY: ReadWriteOnceWithFSType
  ROOK_CEPH_COMMANDS_TIMEOUT_SECONDS: "15"
  ROOK_CSI_ALLOW_UNSUPPORTED_VERSION: "false"
  ROOK_CSI_ATTACHER_IMAGE: longhornio/csi-attacher:v3.2.1
  ROOK_CSI_ENABLE_CEPHFS: "true"
  ROOK_CSI_ENABLE_GRPC_METRICS: "false"
  ROOK_CSI_ENABLE_RBD: "true"
  ROOK_CSI_PROVISIONER_IMAGE: longhornio/csi-provisioner:v2.1.2
  ROOK_CSI_REGISTRAR_IMAGE: longhornio/csi-node-driver-registrar:v2.3.0
  ROOK_CSI_RESIZER_IMAGE: longhornio/csi-resizer:v1.2.0
  ROOK_CSI_SNAPSHOTTER_IMAGE: longhornio/csi-snapshotter:v3.0.3
  ROOK_CSI_ATTACHER_IMAGE: longhornio/csi-attacher:v3.2.1
  ROOK_ENABLE_DISCOVERY_DAEMON: "false"
  ROOK_ENABLE_FLEX_DRIVER: "false"
  ROOK_LOG_LEVEL: INFO
  ROOK_OBC_WATCH_OPERATOR_NAMESPACE: "true"
kind: ConfigMap

| 创建Ceph集群的配置

因为是单点集群,默认副本数会告警,所以修改默认配置。

sukai@ceph-01:~/sukai/ceph$ cat cluster-sukai-pvc-configmap.yaml
kind: ConfigMap
apiVersion: v1
metadata:
  name: rook-config-override
  namespace: rook-ceph # namespace:cluster
data:
  config: |
    [global]
    osd_pool_default_size = 1
    osd_pool_default_min_size = 1
    mon_warn_on_pool_no_redundancy = false
    bdev_flock_retry = 20
    bluefs_buffered_io = false

| 创建Ceph集群

apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
  name: rook-ceph
  namespace: rook-ceph
spec:
  cephVersion:
    image: quay.io/ceph/ceph:v16.2.6
    allowUnsupported: false
  dataDirHostPath: /opt/rook
  skipUpgradeChecks: false
  continueUpgradeAfterChecksEvenIfNotHealthy: false
  waitTimeoutForHealthyOSDInMinutes: 10
  mon:
    count: 1
    allowMultiplePerNode: true
  mgr:
    count: 1
    modules:
      - name: pg_autoscaler
        enabled: true
  sukaird:
    enabled: true
    ssl: false
  monitoring:
    enabled: false
    rulesNamespace: rook-ceph
  network:
    provider: host
  crashCollector:
    disable: false
    #daysToRetain: 30
  cleanupPolicy:
    confirmation: ""
    sanitizeDisks:
      method: quick
      dataSource: zero
      iteration: 1
    allowUninstallWithVolumes: false
  annotations:
  labels:
  resources:
  removeOSDsIfOutAndSafeToRemove: false
  storage:
    storageClassDeviceSets:
      - name: set1
        count: 4
        portable: false
        tuneDeviceClass: true
        volumeClaimTemplates:
          - metadata:
              name: data
            spec:
              resources:
                requests:
                  storage: 900Gi
              storageClassName: manual
              volumeMode: Block
              accessModes:
                - ReadWriteOnce
    onlyApplyOSDPlacement: false
  placement:
    all:
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
          - matchExpressions:
            - key: kubernetes.io/hostname
              operator: In
              values:
              - ceph-01
  disruptionManagement:
    managePodBudgets: false
    osdMaintenanceTimeout: 30
    pgHealthCheckTimeout: 0
    manageMachineDisruptionBudgets: false
    machineDisruptionBudgetNamespace: openshift-machine-api
  healthCheck:
    daemonHealth:
      mon:
        disabled: false
        interval: 45s
      osd:
        disabled: false
        interval: 60s
      status:
        disabled: false
        interval: 60s
    livenessProbe:
      mon:
        disabled: false
      mgr:
        disabled: false
      osd:
        disabled: false

| 创建文件系统

sukai@ceph-01:~/sukai/ceph$ cat filesystem.yaml
apiVersion: ceph.rook.io/v1
kind: CephFilesystem
metadata:
  name: ai
  namespace: rook-ceph
spec:
  metadataPool:
    failureDomain: host
    replicated:
      size: 1
  dataPools:
    - failureDomain: host
      replicated:
        size: 1
  preserveFilesystemOnDelete: true
  metadataServer:
    activeCount: 1
    activeStandby: true
    # A key/value list of annotations
    annotations:
    #  key: value
    placement:
    #  nodeAffinity:
    #    requiredDuringSchedulingIgnoredDuringExecution:
    #      nodeSelectorTerms:
    #      - matchExpressions:
    #        - key: role
    #          operator: In
    #          values:
    #          - mds-node
    #  tolerations:
    #  - key: mds-node
    #    operator: Exists
    #  podAffinity:
    #  podAntiAffinity:
    #  topologySpreadConstraints:
    resources:
    #  limits:
    #    cpu: "500m"
    #    memory: "1024Mi"
    #  requests:
    #    cpu: "500m"
    #    memory: "1024Mi"

| 创建NFS服务

sukai@ceph-01:~/sukai/ceph$ cat nfs.yaml
apiVersion: ceph.rook.io/v1
kind: CephNFS
metadata:
  name: ai-nfs
  namespace: rook-ceph
spec:
  # rados property is not used in versions of Ceph equal to or greater than
  # 16.2.7, see note in RADOS settings section below.
  rados:
    # RADOS pool where NFS client recovery data and per-daemon configs are
    # stored. In this example the data pool for the "myfs" filesystem is used.
    # If using the object store example, the data pool would be
    # "my-store.rgw.buckets.data". Note that this has nothing to do with where
    # exported CephFS' or objectstores live.
    pool: ai-data0
    # RADOS namespace where NFS client recovery data is stored in the pool.
    namespace: nfs-ns
  # Settings for the NFS server
  server:
    # the number of active NFS servers
    active: 1
    # A key/value list of annotations
    annotations:
    #  key: value
    # where to run the NFS server
    placement:
    #  nodeAffinity:
    #    requiredDuringSchedulingIgnoredDuringExecution:
    #      nodeSelectorTerms:
    #      - matchExpressions:
    #        - key: role
    #          operator: In
    #          values:
    #          - mds-node
    #  tolerations:
    #  - key: mds-node
    #    operator: Exists
    #  podAffinity:
    #  podAntiAffinity:
    #  topologySpreadConstraints:

    # The requests and limits set here allow the ganesha pod(s) to use half of one CPU core and 1 gigabyte of memory
    resources:
    #  limits:
    #    cpu: "500m"
    #    memory: "1024Mi"
    #  requests:
    #    cpu: "500m"
    #    memory: "1024Mi"
    # the priority class to set to influence the scheduler's pod preemption
    priorityClassName:

| 创建toolbox

sukai@ceph-01:~/sukai/ceph$ cat toolbox.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: rook-ceph-tools
  namespace: rook-ceph
  labels:
    app: rook-ceph-tools
spec:
  replicas: 1
  selector:
    matchLabels:
      app: rook-ceph-tools
  template:
    metadata:
      labels:
        app: rook-ceph-tools
    spec:
      dnsPolicy: ClusterFirstWithHostNet
      containers:
      - name: rook-ceph-tools
        image: rook/ceph:v1.7.8
        command: ["/tini"]
        args: ["-g", "--", "/usr/local/bin/toolbox.sh"]
        imagePullPolicy: IfNotPresent
        env:
          - name: ROOK_CEPH_USERNAME
            valueFrom:
              secretKeyRef:
                name: rook-ceph-mon
                key: ceph-username
          - name: ROOK_CEPH_SECRET
            valueFrom:
              secretKeyRef:
                name: rook-ceph-mon
                key: ceph-secret
        volumeMounts:
          - mountPath: /etc/ceph
            name: ceph-config
          - name: mon-endpoint-volume
            mountPath: /etc/rook
      volumes:
        - name: mon-endpoint-volume
          configMap:
            name: rook-ceph-mon-endpoints
            items:
            - key: data
              path: mon-endpoints
        - name: ceph-config
          emptyDir: {}
      tolerations:
        - key: "node.kubernetes.io/unreachable"
          operator: "Exists"
          effect: "NoExecute"
          tolerationSeconds: 5

| 在toolbox中开启NFS挂载点管理

ceph sukaird set-ganesha-clusters-rados-pool-namespace ai:nfs-ganesha/ai-nfs

| 创建sukaird Ingress服务

sukai@ceph-01:~/sukai/ceph$ cat cluster-sukai-pvc-ingress.yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: ceph-sukaird
  namespace: rook-ceph
  annotations:
    kubernetes.io/ingress.class: traefik
spec:
  rules:
  - host: ceph.platform.sukai.com
    http:
      paths:
      - path: /
        pathType: ImplementationSpecific
        backend:
          service:
            name: rook-ceph-mgr-sukaird
            port:
              name: http-sukaird

| 访问sukaird

kubectl -n rook-ceph get secret rook-ceph-sukaird-password -o jsonpath="{['data']['password']}" | base64 --decode && echo

image-20211202215025877

| 访问NFS服务

hello@hello-Precision-7920-Rack:~$ sudo mount -t nfs 192.168.10.7:/ai-demo/ /tmp/test
hello@hello-Precision-7920-Rack:~$ cd /tmp/test/
hello@hello-Precision-7920-Rack:/tmp/test$ ls
a  def
hello@hello-Precision-7920-Rack:/tmp/test$

|查看文件系统目录

image-20211202215739238