Difference between revisions of "Job-scripts-jiriaf"

From epsciwiki
Jump to navigation Jump to search
 
(16 intermediate revisions by the same user not shown)
Line 24: Line 24:
 
       image: shifter-stress
 
       image: shifter-stress
 
       command: ["bash"]
 
       command: ["bash"]
       args: ["300", "2"] # Time and cpu should go here
+
       args: ["300", "2"] # Time and cpu for stress
 
       volumeMounts:
 
       volumeMounts:
 
         - name: shifter-stress
 
         - name: shifter-stress
Line 54: Line 54:
 
             operator: In
 
             operator: In
 
             values:
 
             values:
             - "Local"
+
             - "jiriaf"
 
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
 
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
 
     ###
 
     ###
        - key: jiriaf.alivetime  
+
          - key: jiriaf.alivetime  
 
             operator: Gt
 
             operator: Gt
 
             values:
 
             values:
Line 70: Line 70:
  
 
== Compute Sites Utilizing Docker or Other Container Runtimes Operating in Root Space ==
 
== Compute Sites Utilizing Docker or Other Container Runtimes Operating in Root Space ==
Two containers are instantiated in this process. The first container is dedicated to the user's job, while the second container's role is to adjust the <code>PGID</code> of the first container. This adjustment ensures that metrics from the correct processes running in the first container are accurately collected.
+
Two containers are instantiated in this process. The first container is dedicated to the user's job, while the second container's role is to adjust the <code>PGID</code> of the first container. This adjustment ensures that metrics from the correct processes running in the first container are accurately collected. '''Please note that the PGID may not always be correct.''' It's essential to ensure that the PGID corresponds to the processes running within the container.
  
 
<source lang="yaml">
 
<source lang="yaml">
Line 106: Line 106:
 
       image: docker-stress
 
       image: docker-stress
 
       command: ["bash"]
 
       command: ["bash"]
       args: ["300", "2", "~/p/containers/c1/p"] # Time and cpu should go here
+
       args: ["300", "2", "~/default/some-name/containers/c1/p"] # "default" is the namespace of the pod. "some-name" is the pod name.
 
       volumeMounts:
 
       volumeMounts:
 
         - name: docker-stress
 
         - name: docker-stress
Line 120: Line 120:
 
       image: get-pgid
 
       image: get-pgid
 
       command: ["bash"]
 
       command: ["bash"]
       args: ["~/p/containers/c1/p", "~/p/containers/c1/pgid"] # Time and cpu should go here
+
       args: ["~/default/some-name/containers/c1/p", "~/default/some-name/containers/c1/pgid"] # "default" is the namespace of the pod. "some-name" is the pod name.
 
       volumeMounts:
 
       volumeMounts:
 
         - name: get-pgid
 
         - name: get-pgid
Line 153: Line 153:
 
             operator: In
 
             operator: In
 
             values:
 
             values:
             - "Local"
+
             - "jiriaf"
 
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
 
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
 
     ###
 
     ###
        - key: jiriaf.alivetime  
+
          - key: jiriaf.alivetime  
 
             operator: Gt
 
             operator: Gt
 
             values:
 
             values:
Line 168: Line 168:
 
</source>
 
</source>
  
== Compute Sites Utilizing Docker or Other Container Runtimes Operating in Root Space ==
+
= Reference =
Two containers are instantiated in this process. The first container is dedicated to the user's job, while the second container's role is to adjust the <code>PGID</code> of the first container. This adjustment ensures that metrics from the correct processes running in the first container are accurately collected.
+
[[Virtual-kubelet-cmd]]
 
 
<pre>
 
kind: ConfigMap
 
apiVersion: v1
 
metadata:
 
  name: docker-stress
 
data:
 
  stress.sh: |
 
    #!/bin/bash
 
    export PGID_FILE=$3
 
    docker run -d --rm -e NUMBER=$2 -e TIME=$1 jlabtsai/stress:latest > /dev/null
 
    ## find the last container id
 
    export CONTAINER_ID=$(docker ps -l -q)
 
    docker inspect -f '{{.State.Pid}}' $CONTAINER_ID > $3
 
    sleep $1
 
---
 
kind: ConfigMap
 
apiVersion: v1
 
metadata:
 
  name: get-pgid
 
data:
 
  stress.sh: |
 
    #!/bin/bash
 
    sleep 3
 
    cp $1 $2
 
---
 
apiVersion: v1
 
kind: Pod
 
metadata:
 
  name: some-name # Job Name Here
 
spec:
 
  containers:
 
    - name: c1
 
      image: docker-stress
 
      command: ["bash"]
 
      args: ["300", "2", "~/p/containers/c1/p"] # Time and cpu should go here
 
      volumeMounts:
 
        - name: docker-stress
 
          mountPath: docker-stress
 
      resources:
 
        limits:
 
          cpu: "2"
 
          memory: 1Gi
 
        requests:
 
          cpu: "1" # Number of CPUs Here as well
 
          memory: 1Gi # Memory Here
 
    - name: c2
 
      image: get-pgid
 
      command: ["bash"]
 
      args: ["~/p/containers/c1/p", "~/p/containers/c1/pgid"] # Time and cpu should go here
 
      volumeMounts:
 
        - name: get-pgid
 
          mountPath: get-pgid
 
      resources:
 
        limits:
 
          cpu: "2"
 
          memory: 1Gi
 
        requests:
 
          cpu: "1" # Number of CPUs Here as well
 
          memory: 1Gi # Memory Here
 
  volumes:
 
    - name: docker-stress
 
      configMap:
 
        name: docker-stress
 
    - name: get-pgid
 
      configMap:
 
        name: get-pgid
 
  nodeSelector:
 
    kubernetes.io/role: agent
 
  affinity:
 
    nodeAffinity:
 
      requiredDuringSchedulingIgnoredDuringExecution:
 
        nodeSelectorTerms:
 
        - matchExpressions:
 
# Below are the labels for the node, corresponding to the jiriaf labels
 
          - key: jiriaf.nodetype
 
            operator: In
 
            values:
 
            - "cpu"
 
          - key: jiriaf.site
 
            operator: In
 
            values:
 
            - "Local"
 
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
 
    ###
 
        - key: jiriaf.alivetime
 
            operator: Gt
 
            values:
 
            - "30"
 
    ###
 
  tolerations:
 
    - key: "virtual-kubelet.io/provider"
 
      value: "mock"
 
      effect: "NoSchedule"
 
  restartPolicy: Never
 
</pre>
 

Latest revision as of 20:51, 10 April 2024

Job Scripts

Job scripts include the definitions for both the configMap and the pod associated with a particular job.

Computing Sites Running Containers in User Space (Common in HPC Environments Using Singularity or Shifter)

kind: ConfigMap
apiVersion: v1
metadata:
  name: shifter-stress
data:
  stress.sh: |
    #!/bin/bash
    export NUMBER=$2
    export TIME=$1
    shifter --image="jlabtsai/stress:latest" --entrypoint
---
apiVersion: v1
kind: Pod
metadata:
  name: some-name # Job Name Here
spec:
  containers:
    - name: c1
      image: shifter-stress
      command: ["bash"]
      args: ["300", "2"] # Time and cpu for stress
      volumeMounts:
        - name: shifter-stress
          mountPath: shifter-stress
      resources:
        limits:
          cpu: "2"
          memory: 1Gi
        requests:
          cpu: "1" # Number of CPUs Here as well
          memory: 1Gi # Memory Here 
  volumes:
    - name: shifter-stress
      configMap:
        name: shifter-stress
  nodeSelector:
    kubernetes.io/role: agent
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
# Below are the labels for the node, corresponding to the jiriaf labels
          - key: jiriaf.nodetype
            operator: In
            values:
            - "cpu"
          - key: jiriaf.site
            operator: In
            values:
            - "jiriaf"
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
    ###
          - key: jiriaf.alivetime 
            operator: Gt
            values:
            - "30"
    ###
  tolerations:
    - key: "virtual-kubelet.io/provider"
      value: "mock"
      effect: "NoSchedule"
  restartPolicy: Never

Compute Sites Utilizing Docker or Other Container Runtimes Operating in Root Space

Two containers are instantiated in this process. The first container is dedicated to the user's job, while the second container's role is to adjust the PGID of the first container. This adjustment ensures that metrics from the correct processes running in the first container are accurately collected. Please note that the PGID may not always be correct. It's essential to ensure that the PGID corresponds to the processes running within the container.

kind: ConfigMap
apiVersion: v1
metadata:
  name: docker-stress
data:
  stress.sh: |
    #!/bin/bash
    export PGID_FILE=$3
    docker run -d --rm -e NUMBER=$2 -e TIME=$1 jlabtsai/stress:latest > /dev/null
    ## find the last container id
    export CONTAINER_ID=$(docker ps -l -q)
    docker inspect -f '{{.State.Pid}}' $CONTAINER_ID > $3
    sleep $1
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: get-pgid
data:
  stress.sh: |
    #!/bin/bash
    sleep 3
    cp $1 $2
---
apiVersion: v1
kind: Pod
metadata:
  name: some-name # Job Name Here
spec:
  containers:
    - name: c1
      image: docker-stress
      command: ["bash"]
      args: ["300", "2", "~/default/some-name/containers/c1/p"] # "default" is the namespace of the pod. "some-name" is the pod name.
      volumeMounts:
        - name: docker-stress
          mountPath: docker-stress
      resources:
        limits:
          cpu: "2"
          memory: 1Gi
        requests:
          cpu: "1" # Number of CPUs Here as well
          memory: 1Gi # Memory Here 
    - name: c2
      image: get-pgid
      command: ["bash"]
      args: ["~/default/some-name/containers/c1/p", "~/default/some-name/containers/c1/pgid"] # "default" is the namespace of the pod. "some-name" is the pod name.
      volumeMounts:
        - name: get-pgid
          mountPath: get-pgid
      resources:
        limits:
          cpu: "2"
          memory: 1Gi
        requests:
          cpu: "1" # Number of CPUs Here as well
          memory: 1Gi # Memory Here 
  volumes:
    - name: docker-stress
      configMap:
        name: docker-stress
    - name: get-pgid
      configMap:
        name: get-pgid
  nodeSelector:
    kubernetes.io/role: agent
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions: 
# Below are the labels for the node, corresponding to the jiriaf labels
          - key: jiriaf.nodetype
            operator: In
            values:
            - "cpu"
          - key: jiriaf.site
            operator: In
            values:
            - "jiriaf"
# Below should be commented out if the JIRIAF_WALLTIME is set to 0
    ###
          - key: jiriaf.alivetime 
            operator: Gt
            values:
            - "30"
    ###
  tolerations:
    - key: "virtual-kubelet.io/provider"
      value: "mock"
      effect: "NoSchedule"
  restartPolicy: Never

Reference

Virtual-kubelet-cmd