Linux compatible setaffinity.
- From: Jeff Roberson <jroberson@xxxxxxxxxxxxxx>
- Date: Wed, 19 Dec 2007 21:19:32 -1000 (HST)
I have implemented a linux compatible sched_setaffinity() call which is somewhat crippled. This allows a userspace process to supply a bitmask of processors which it will run on. I have copied the linux interface such that it should be api compatible because I believe it is a sensible interface and they beat us to it by 3 years.
My implementation is crippled in that it supports binding by curthread only and to a single cpu only. Neither of the schedulers presently support binding to multiple cpus or binding a non-curthread thread. This property is not inherited by forked threads and does not effect other threads in the same process. These two limitations can gradually be weakened without effecting the syscall api.
The linux api is:
int sched_setaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t *mask);
The cpu_set_t is the same as a fdset for select. The cpusetsize argument is used to determine the size of the array in mask.
I'm mostly interested in feedback on how best to reduce the namespace pollution and avoid pulling the sched.h file into the generated syscall files (sysproto.h, etc). Anyone who feels this is a terrible interface for such a thing should speak up now.
I also feel that in the medium term we will have to deal with machines with more cores than bits in their native word. Using these CPU_SET, CPU_CLR macros is a fine way to deal with this issue.
I also have a primitive 'taskset', although I don't like the name, it allows you to run arbitrary programs bound to a single cpu.
Thanks,
JeffIndex: kern/kern_resource.c
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/kern_resource.c,v
retrieving revision 1.2.10.2
diff -u -r1.2.10.2 kern_resource.c
--- kern/kern_resource.c 17 Nov 2007 01:01:39 -0000 1.2.10.2
+++ kern/kern_resource.c 20 Dec 2007 07:09:11 -0000
@@ -52,6 +52,7 @@
#include <sys/refcount.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@@ -731,6 +732,45 @@
return (error);
}
+#ifndef _SYS_SYSPROTO_H_
+struct sched_setaffinity_args {
+ pid_t pid;
+ unsigned int cpusetsize;
+ cpu_set_t *mask;
+};
+#endif
+
+int
+sched_setaffinity(struct thread *td, struct sched_setaffinity_args *uap)
+{
+ cpu_set_t mask;
+ int error;
+ int cpu;
+ int i;
+
+ if (uap->pid != 0)
+ return (EPERM);
+ if (uap->cpusetsize != CPU_SETSIZE)
+ return (EINVAL);
+ error = copyin(uap->mask, &mask, sizeof(mask));
+ if (error)
+ return (error);
+ for (cpu = 0, i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &mask))
+ continue;
+ if (cpu)
+ return (EINVAL);
+ cpu = i + 1;
+ }
+ cpu--;
+ if (CPU_ABSENT(cpu))
+ return (EINVAL);
+ thread_lock(curthread);
+ sched_bind(curthread, cpu);
+ thread_unlock(curthread);
+ return (0);
+}
+
/*
* Transform the running time and tick information for children of proc p
* into user and system time usage.
Index: kern/makesyscalls.sh
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/makesyscalls.sh,v
retrieving revision 1.1
diff -u -r1.1 makesyscalls.sh
--- kern/makesyscalls.sh 10 Feb 2006 03:54:18 -0000 1.1
+++ kern/makesyscalls.sh 20 Dec 2007 07:09:11 -0000
@@ -117,6 +117,8 @@
printf "#define\t%s\n\n", sysproto_h > sysarg
printf "#include <sys/signal.h>\n" > sysarg
printf "#include <sys/acl.h>\n" > sysarg
+ printf "#include <sys/proc.h>\n" > sysarg
+ printf "#include <sys/sched.h>\n" > sysarg
printf "#include <sys/thr.h>\n" > sysarg
printf "#include <sys/umtx.h>\n" > sysarg
printf "#include <posix4/_semaphore.h>\n\n" > sysarg
Index: kern/sched_4bsd.c
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.7.6.2
diff -u -r1.7.6.2 sched_4bsd.c
--- kern/sched_4bsd.c 29 Nov 2007 01:53:51 -0000 1.7.6.2
+++ kern/sched_4bsd.c 20 Dec 2007 07:09:11 -0000
@@ -1442,6 +1442,7 @@
cpu_idle();
}
mtx_lock_spin(&sched_lock);
+ SCHED_STAT_INC(switch_idle);
mi_switch(SW_VOL, NULL);
mtx_unlock_spin(&sched_lock);
}
Index: kern/syscalls.master
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/syscalls.master,v
retrieving revision 1.2
diff -u -r1.2 syscalls.master
--- kern/syscalls.master 21 Feb 2007 06:34:30 -0000 1.2
+++ kern/syscalls.master 20 Dec 2007 07:09:12 -0000
@@ -793,6 +793,8 @@
long id, void *uaddr, void *uaddr2); }
455 AUE_NULL MSTD { int thr_new(struct thr_param *param, \
int param_size); }
+456 AUE_NULL MSTD { int sched_setaffinity(pid_t pid, \
+ unsigned int cpusetsize, cpu_set_t *mask); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/sched.h
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/sys/sched.h,v
retrieving revision 1.2.10.2
diff -u -r1.2.10.2 sched.h
--- sys/sched.h 3 Dec 2007 21:47:09 -0000 1.2.10.2
+++ sys/sched.h 20 Dec 2007 07:09:18 -0000
@@ -198,6 +198,37 @@
int sched_priority;
};
+typedef unsigned long __cpu_mask;
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE 1024U
+#endif
+
+#define _NCPUBITS (sizeof(__cpu_mask) * 8) /* bits per mask */
+
+#ifndef _howmany
+#define _howmany(x, y) (((x) + ((y) - 1)) / (y))
+#endif
+
+typedef struct cpu_set {
+ __cpu_mask __cpus_bits[_howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpu_set_t;
+
+#define __cpuset_mask(n) ((__cpu_mask)1 << ((n) % _NCPUBITS))
+#define CPU_CLR(n, p) ((p)->__cpus_bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n))
+#define CPU_COPY(f, t) (void)(*(t) = *(f))
+#define CPU_ISSET(n, p) (((p)->__cpus_bits[(n)/_NCPUBITS] & __cpuset_mask(n)) != 0)
+#define CPU_SET(n, p) ((p)->__cpus_bits[(n)/_NCPUBITS] |= __cpuset_mask(n))
+#define CPU_ZERO(p) do { \
+ cpu_set_t *_p; \
+ __size_t _n; \
+ \
+ _p = (p); \
+ _n = _howmany(CPU_SETSIZE, _NCPUBITS); \
+ while (_n > 0) \
+ _p->__cpus_bits[--_n] = 0; \
+} while (0)
+
/*
* POSIX scheduling declarations for userland.
*/
@@ -213,6 +244,8 @@
struct timespec;
__BEGIN_DECLS
+int sched_setaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t *mask);
+int sched_getaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t *mask);
int sched_get_priority_max(int);
int sched_get_priority_min(int);
int sched_getparam(pid_t, struct sched_param *);
_______________________________________________
freebsd-arch@xxxxxxxxxxx mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-arch
To unsubscribe, send any mail to "freebsd-arch-unsubscribe@xxxxxxxxxxx"
- Follow-Ups:
- Re: Linux compatible setaffinity.
- From: Robert Watson
- Re: Linux compatible setaffinity.
- From: David Xu
- Re: Linux compatible setaffinity.
- From: Julian Elischer
- Re: Linux compatible setaffinity.
- From: Andre Oppermann
- Re: Linux compatible setaffinity.
- Prev by Date: Re: DDB scripting, output capture, and textdumps
- Next by Date: Re: Coordinating TCP projects
- Previous by thread: TOE support issues
- Next by thread: Re: Linux compatible setaffinity.
- Index(es):
Relevant Pages
|