I am using Torque 2.0.0p8-3 and maui.
I have two client nodes running. One has 4
dual-core AMD CPUs (total 8 logic CPUs). The other host has 2 dual-core CPUs
(total 4 logic CPUs). I use qsub to submit serial jobs and
can only have maximum 6 jobs running simultaneously. I expect the cluster
to run 12 jobs. I tried a few things using “qmgr” to change
some of the parameters. It was not successful. This problem probably has
something to do with dual-core CPU.
Does anyone know how to solve this problem? Many thanks!
Here is what I get if I do
"print server" from qmgr.
#
# Create queues and set
their attributes.
#
#
# Create and define queue
workq
#
create queue workq
set queue workq queue_type =
Execution
set queue workq max_running
= 12
set queue workq
resources_max.cput = 10000:00:00
set queue workq
resources_max.ncpus = 12
set queue workq
resources_max.nodect = 2
set queue workq
resources_max.walltime = 10000:00:00
set queue workq
resources_min.cput = 00:00:01
set queue workq
resources_min.ncpus = 1
set queue workq
resources_min.nodect = 1
set queue workq
resources_min.walltime = 00:00:01
set queue workq
resources_default.cput = 10000:00:00
set queue workq
resources_default.ncpus = 1
set queue workq
resources_default.nodect = 1
set queue workq
resources_default.walltime = 10000:00:00
set queue workq
resources_available.nodect = 2
set queue workq max_user_run
= 12
set queue workq enabled =
True
set queue workq started =
True
#
# Set server attributes.
#
set server scheduling = True
set server default_queue =
workq
set server log_events = 64
set server mail_from = adm
set server query_other_jobs
= True
set server
resources_available.ncpus = 12
set server
resources_available.nodect = 2
set server
resources_available.nodes = 2
set server
resources_max.ncpus = 12
set server
resources_max.nodes = 2
set server
scheduler_iteration = 60
set server node_check_rate =
150
set server tcp_timeout = 6
set server pbs_version =
2.0.0p8
===================================================
Here is the maui
configuration file
# maui.cfg 3.2.6p14
SERVERHOST photon.bwh.harvard.edu
# primary admin must be
first in list
ADMIN1
root
# Resource Manager
Definition
RMCFG[DUAL.EFOCHT.DE]
TYPE=PBS
# Allocation Manager
Definition
AMCFG[bank] TYPE=NONE
# full parameter docs at
http://clusterresources.com/mauidocs/a.fparameters.html
# use the 'schedctl -l'
command to display current configuration
RMPOLLINTERVAL 00:00:10
SERVERPORT
42559
SERVERMODE
NORMAL
# Admin: http://clusterresources.com/mauidocs/a.esecurity.html
LOGFILE
maui.log
LOGFILEMAXSIZE
10000000
LOGLEVEL
3
# Job Priority:
http://clusterresources.com/mauidocs/5.1jobprioritization.html
QUEUETIMEWEIGHT
1
# FairShare: http://clusterresources.com/mauidocs/6.3fairshare.html
#FSPOLICY
PSDEDICATED
#FSDEPTH
7
#FSINTERVAL
86400
#FSDECAY
0.80
# Throttling Policies:
http://clusterresources.com/mauidocs/6.2throttlingpolicies.html
# NONE SPECIFIED
# Backfill:
http://clusterresources.com/mauidocs/8.2backfill.html
BACKFILLPOLICY ON
RESERVATIONPOLICY
CURRENTHIGHEST
# Node Allocation:
http://clusterresources.com/mauidocs/5.2nodeallocation.html
NODEALLOCATIONPOLICY
MINRESOURCE
# QOS:
http://clusterresources.com/mauidocs/7.3qos.html
# QOSCFG[hi]
PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
# QOSCFG[low] PRIORITY=-1000
FLAGS=PREEMPTEE
# Standing Reservations:
http://clusterresources.com/mauidocs/7.1.3standingreservations.html
# SRSTARTTIME[test] 8:00:00
#
SRENDTIME[test] 17:00:00
#
SRDAYS[test] MON TUE WED THU FRI
# SRTASKCOUNT[test] 20
#
SRMAXTIME[test] 0:30:00
# Creds:
http://clusterresources.com/mauidocs/6.1fairnessoverview.html
#
USERCFG[DEFAULT] FSTARGET=25.0
#
USERCFG[john]
PRIORITY=100 FSTARGET=10.0-
#
GROUPCFG[staff] PRIORITY=1000 QLIST=hi:low
QDEF=hi
#
CLASSCFG[batch] FLAGS=PREEMPTEE
# CLASSCFG[interactive]
FLAGS=PREEMPTOR
NODEACCESSPOLICY DEDICATED