logo       
Google Custom Search
    AddThis Social Bookmark Button
-->

Re: defining queues by user defined node features: msg#00077

Subject: Re: defining queues by user defined node features
I tried shutting down Maui and running the default pbs_sched instead. No change in behavior. I've set the resource_available.nodes to x86 or x84-64 in the execution queues thinking that the routing queue would then route the 32 bit requests to short or long and the 64 bit jobs to short-64 or long-64 depending on the wall time requested, but that has no effect. At this point I have no idea what I am doing wrong, Any ideas?
                  Thanks,
                     Spencer



P Spencer Davis wrote:
Hello,
I'm running v 2.1.6 of PBS as a resource manager with v 3.2.6p19 of the Maui scheduler. All the compute nodes are running RHEL 4 with the 2.6.9-55 kernel. The cluster is heterogious, 32 of the nodes are 32 bit dual processor, and the other 32 are 64 bit dual processor. The nodes file in server_priv is configured as follows (edited for brevity)
...
n31 np=2 x86
n32 np=2 x86-64
...

with the idea being that submitting a job with nodes=x86-64 will select a 64 bit node. This worked fine until I created a routing queue with a short and a long execution queue, now the jobs are routed in a haphazard way. I tried creating short and long queues with the following properties:
Queue short-64
        queue_type = Execution
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        resources_max.walltime = 24:00:00
        resources_default.neednodes = x86-64
        resources_default.nodes = x86-64
        mtime = Fri Sep 14 14:25:56 2007
        enabled = True
        started = True
and they work fine as long as I submit jobs directly to them, but if the job is submitted to the default routing queue, it will only be routed by cpu or walltime.
                   Any insight is appricaited,
                                   Spencer
Here are my queue defintions:
Queue short
        queue_type = Execution
        Priority = 20
        max_queuable = 62
        total_jobs = 4
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:4 Exiting:0
        from_route_only = True
        resources_max.cput = 24:00:00
        resources_max.walltime = 24:00:00
        resources_min.cput = 00:00:00
        resources_default.neednodes = x86
        resources_default.nodes = x86
        mtime = Fri Sep 14 14:27:28 2007
        resources_assigned.mem = 16777216b
        resources_assigned.nodect = 4
        enabled = True
        started = True

Queue routing
        queue_type = Route
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        resources_default.walltime = 00:10:00
        mtime = Fri Sep 14 14:06:20 2007
        route_destinations = short,long,long-64,short-64
        route_held_jobs = True
        route_waiting_jobs = True
        route_retry_time = 120
        route_lifetime = 604800
        enabled = True
        started = True

Queue long-64
        queue_type = Execution
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        resources_min.walltime = 24:00:00
        resources_default.neednodes = x86-64
        mtime = Fri Sep 14 14:42:06 2007
        enabled = True
        started = True

Queue bsu-research
        queue_type = Execution
        Priority = 80
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        from_route_only = False
        acl_group_enable = True
        acl_groups = ccnstaff
        mtime = Tue Aug 21 12:34:26 2007
        enabled = True
        started = True

Queue long
        queue_type = Execution
        Priority = 20
        max_queuable = 62
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        acl_host_enable = False
        from_route_only = True
        resources_min.cput = 24:00:01
        resources_min.walltime = 24:00:01
        resources_default.neednodes = x86
        mtime = Fri Sep 14 14:01:39 2007
        resources_assigned.mem = 0b
        resources_assigned.nodect = 0
        enabled = True
        started = True

Queue short-64
        queue_type = Execution
        total_jobs = 0
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 Exiting:0
        resources_max.walltime = 24:00:00
        resources_default.neednodes = x86-64
        resources_default.nodes = x86-64
        mtime = Fri Sep 14 14:25:56 2007
        enabled = True
        started = True

my server configuration
Server ccncluster.bsu.edu
        server_state = Active
        scheduling = True
        total_jobs = 4
state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:4 Exiting:0
        managers =
        operators =
        default_queue = routing
        log_events = 511
        mail_from = adm
        resources_default.mem = 4mb
        resources_assigned.mem = 16777216b
        resources_assigned.nodect = 4
        scheduler_iteration = 600
        node_check_rate = 150
        tcp_timeout = 6
        node_pack = False
        pbs_version = 2.1.6


and the maui configuration:

# maui.cfg 3.2.6p19

SERVERHOST            somehost.nowhere.net
# primary admin must be first in list
ADMIN1               00notreal

# Resource Manager Definition

#RMCFG[SOMEHOST] TYPE=PBS@RMNMHOST@
RMCFG[base]   TYPE=PBS

# Allocation Manager Definition

AMCFG[bank]  TYPE=NONE

# full parameter docs at http://supercluster.org/mauidocs/a.fparameters.html
# use the 'schedctl -l' command to display current configuration

RMPOLLINTERVAL        00:00:30

SERVERPORT            42559
SERVERMODE            NORMAL

# Admin: http://supercluster.org/mauidocs/a.esecurity.html


LOGFILE               maui.log
LOGFILEMAXSIZE        10000000
LOGLEVEL              3

# Job Priority: http://supercluster.org/mauidocs/5.1jobprioritization.html

QUEUETIMEWEIGHT       1

# FairShare: http://supercluster.org/mauidocs/6.3fairshare.html

#FSPOLICY              PSDEDICATED
#FSDEPTH               7
#FSINTERVAL            86400
#FSDECAY               0.80

# Throttling Policies: http://supercluster.org/mauidocs/6.2throttlingpolicies.html

# NONE SPECIFIED

# Backfill: http://supercluster.org/mauidocs/8.2backfill.html

BACKFILLPOLICY        NONE
RESERVATIONPOLICY     CURRENTHIGHEST

# Maui Feature polices

ENABLEMULTIREQJOBS TRUE

# Node Allocation: http://supercluster.org/mauidocs/5.2nodeallocation.html

NODEALLOCATIONPOLICY MINRESOURCE

# QOS: http://supercluster.org/mauidocs/7.3qos.html

# QOSCFG[hi]  PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
# QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE

# Standing Reservations: http://supercluster.org/mauidocs/7.1.3standingreservations.html

# SRSTARTTIME[test] 8:00:00
# SRENDTIME[test]   17:00:00
# SRDAYS[test]      MON TUE WED THU FRI
# SRTASKCOUNT[test] 20
# SRMAXTIME[test]   0:30:00

# Creds: http://supercluster.org/mauidocs/6.1fairnessoverview.html

# USERCFG[DEFAULT]      FSTARGET=25.0
# USERCFG[john]         PRIORITY=100  FSTARGET=10.0-
# GROUPCFG[staff]       PRIORITY=1000 QLIST=hi:low QDEF=hi
# CLASSCFG[batch]       FLAGS=PREEMPTEE
# CLASSCFG[interactive] FLAGS=PREEMPTOR


_______________________________________________
torqueusers mailing list
torqueusers@xxxxxxxxxxxxxxxx
http://www.supercluster.org/mailman/listinfo/torqueusers



<Prev in Thread] Current Thread [Next in Thread>