Merge pull request #6262 from sudo-sturbia:msgqueue/syscalls3

PiperOrigin-RevId: 391416650
This commit is contained in:
gVisor bot
2021-08-17 17:44:26 -07:00
9 changed files with 386 additions and 47 deletions
+1 -1
View File
@@ -47,7 +47,7 @@ const (
MSGSSZ = 16
// MSGSEG is simplified due to the inexistance of a ternary operator.
MSGSEG = (MSGPOOL * 1024) / MSGSSZ
MSGSEG = 0xffff
)
// MsqidDS is equivelant to struct msqid64_ds. Source:
+35
View File
@@ -19,6 +19,8 @@ package ipc
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
@@ -113,3 +115,36 @@ func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool
}
return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS)
}
// Set modifies attributes for an IPC object. See *ctl(IPC_SET).
//
// Precondition: Mechanism.mu must be held.
func (o *Object) Set(ctx context.Context, perm *linux.IPCPerm) error {
creds := auth.CredentialsFromContext(ctx)
uid := creds.UserNamespace.MapToKUID(auth.UID(perm.UID))
gid := creds.UserNamespace.MapToKGID(auth.GID(perm.GID))
if !uid.Ok() || !gid.Ok() {
// The man pages don't specify an errno for invalid uid/gid, but EINVAL
// is generally used for invalid arguments.
return linuxerr.EINVAL
}
if !o.CheckOwnership(creds) {
// "The argument cmd has the value IPC_SET or IPC_RMID, but the
// effective user ID of the calling process is not the creator (as
// found in msg_perm.cuid) or the owner (as found in msg_perm.uid)
// of the message queue, and the caller is not privileged (Linux:
// does not have the CAP_SYS_ADMIN capability)."
return linuxerr.EPERM
}
// User may only modify the lower 9 bits of the mode. All the other bits are
// always 0 for the underlying inode.
mode := linux.FileMode(perm.Mode & 0x1ff)
o.Perms = fs.FilePermsFromMode(mode)
o.Owner.UID = uid
o.Owner.GID = gid
return nil
}
+109
View File
@@ -206,6 +206,48 @@ func (r *Registry) FindByID(id ipc.ID) (*Queue, error) {
return mech.(*Queue), nil
}
// IPCInfo reports global parameters for message queues. See msgctl(IPC_INFO).
func (r *Registry) IPCInfo(ctx context.Context) *linux.MsgInfo {
return &linux.MsgInfo{
MsgPool: linux.MSGPOOL,
MsgMap: linux.MSGMAP,
MsgMax: linux.MSGMAX,
MsgMnb: linux.MSGMNB,
MsgMni: linux.MSGMNI,
MsgSsz: linux.MSGSSZ,
MsgTql: linux.MSGTQL,
MsgSeg: linux.MSGSEG,
}
}
// MsgInfo reports global parameters for message queues. See msgctl(MSG_INFO).
func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo {
r.mu.Lock()
defer r.mu.Unlock()
var messages, bytes uint64
r.reg.ForAllObjects(
func(o ipc.Mechanism) {
q := o.(*Queue)
q.mu.Lock()
messages += q.messageCount
bytes += q.byteCount
q.mu.Unlock()
},
)
return &linux.MsgInfo{
MsgPool: int32(r.reg.ObjectCount()),
MsgMap: int32(messages),
MsgTql: int32(bytes),
MsgMax: linux.MSGMAX,
MsgMnb: linux.MSGMNB,
MsgMni: linux.MSGMNI,
MsgSsz: linux.MSGSSZ,
MsgSeg: linux.MSGSEG,
}
}
// Send appends a message to the message queue, and returns an error if sending
// fails. See msgsnd(2).
func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error {
@@ -465,6 +507,73 @@ func (q *Queue) msgAtIndex(mType int64) *Message {
return msg
}
// Set modifies some values of the queue. See msgctl(IPC_SET).
func (q *Queue) Set(ctx context.Context, ds *linux.MsqidDS) error {
q.mu.Lock()
defer q.mu.Unlock()
creds := auth.CredentialsFromContext(ctx)
if ds.MsgQbytes > maxQueueBytes && !creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, q.obj.UserNS) {
// "An attempt (IPC_SET) was made to increase msg_qbytes beyond the
// system parameter MSGMNB, but the caller is not privileged (Linux:
// does not have the CAP_SYS_RESOURCE capability)."
return linuxerr.EPERM
}
if err := q.obj.Set(ctx, &ds.MsgPerm); err != nil {
return err
}
q.maxBytes = ds.MsgQbytes
q.changeTime = ktime.NowFromContext(ctx)
return nil
}
// Stat returns a MsqidDS object filled with information about the queue. See
// msgctl(IPC_STAT) and msgctl(MSG_STAT).
func (q *Queue) Stat(ctx context.Context) (*linux.MsqidDS, error) {
return q.stat(ctx, fs.PermMask{Read: true})
}
// StatAny is similar to Queue.Stat, but doesn't require read permission. See
// msgctl(MSG_STAT_ANY).
func (q *Queue) StatAny(ctx context.Context) (*linux.MsqidDS, error) {
return q.stat(ctx, fs.PermMask{})
}
// stat returns a MsqidDS object filled with information about the queue. An
// error is returned if the user doesn't have the specified permissions.
func (q *Queue) stat(ctx context.Context, mask fs.PermMask) (*linux.MsqidDS, error) {
q.mu.Lock()
defer q.mu.Unlock()
creds := auth.CredentialsFromContext(ctx)
if !q.obj.CheckPermissions(creds, mask) {
// "The caller must have read permission on the message queue."
return nil, linuxerr.EACCES
}
return &linux.MsqidDS{
MsgPerm: linux.IPCPerm{
Key: uint32(q.obj.Key),
UID: uint32(creds.UserNamespace.MapFromKUID(q.obj.Owner.UID)),
GID: uint32(creds.UserNamespace.MapFromKGID(q.obj.Owner.GID)),
CUID: uint32(creds.UserNamespace.MapFromKUID(q.obj.Creator.UID)),
CGID: uint32(creds.UserNamespace.MapFromKGID(q.obj.Creator.GID)),
Mode: uint16(q.obj.Perms.LinuxMode()),
Seq: 0, // IPC sequences not supported.
},
MsgStime: q.sendTime.TimeT(),
MsgRtime: q.receiveTime.TimeT(),
MsgCtime: q.changeTime.TimeT(),
MsgCbytes: q.byteCount,
MsgQnum: q.messageCount,
MsgQbytes: q.maxBytes,
MsgLspid: q.sendPID,
MsgLrpid: q.receivePID,
}, nil
}
// Lock implements ipc.Mechanism.Lock.
func (q *Queue) Lock() {
q.mu.Lock()
+4 -8
View File
@@ -336,19 +336,15 @@ func (s *Set) Size() int {
return len(s.sems)
}
// Change changes some fields from the set atomically.
func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.FileOwner, perms fs.FilePermissions) error {
// Set modifies attributes for a semaphore set. See semctl(IPC_SET).
func (s *Set) Set(ctx context.Context, ds *linux.SemidDS) error {
s.mu.Lock()
defer s.mu.Unlock()
// "The effective UID of the calling process must match the owner or creator
// of the semaphore set, or the caller must be privileged."
if !s.obj.CheckOwnership(creds) {
return linuxerr.EACCES
if err := s.obj.Set(ctx, &ds.SemPerm); err != nil {
return err
}
s.obj.Owner = owner
s.obj.Perms = perms
s.changeTime = ktime.NowFromContext(ctx)
return nil
}
+2 -17
View File
@@ -618,25 +618,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
s.mu.Lock()
defer s.mu.Unlock()
creds := auth.CredentialsFromContext(ctx)
if !s.obj.CheckOwnership(creds) {
return linuxerr.EPERM
if err := s.obj.Set(ctx, &ds.ShmPerm); err != nil {
return err
}
uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID))
gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID))
if !uid.Ok() || !gid.Ok() {
return linuxerr.EINVAL
}
// User may only modify the lower 9 bits of the mode. All the other bits are
// always 0 for the underlying inode.
mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff)
s.obj.Perms = fs.FilePermsFromMode(mode)
s.obj.Owner.UID = uid
s.obj.Owner.GID = gid
s.changeTime = ktime.NowFromContext(ctx)
return nil
}
+2 -2
View File
@@ -123,7 +123,7 @@ var AMD64 = &kernel.SyscallTable{
68: syscalls.Supported("msgget", Msgget),
69: syscalls.Supported("msgsnd", Msgsnd),
70: syscalls.Supported("msgrcv", Msgrcv),
71: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
71: syscalls.Supported("msgctl", Msgctl),
72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
@@ -616,7 +616,7 @@ var ARM64 = &kernel.SyscallTable{
184: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
185: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
186: syscalls.Supported("msgget", Msgget),
187: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
187: syscalls.Supported("msgctl", Msgctl),
188: syscalls.Supported("msgrcv", Msgrcv),
189: syscalls.Supported("msgsnd", Msgsnd),
190: syscalls.Supported("semget", Semget),
+52 -1
View File
@@ -130,12 +130,63 @@ func receive(t *kernel.Task, id ipc.ID, mType int64, maxSize int64, msgCopy, wai
func Msgctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
id := ipc.ID(args[0].Int())
cmd := args[1].Int()
buf := args[2].Pointer()
creds := auth.CredentialsFromContext(t)
r := t.IPCNamespace().MsgqueueRegistry()
switch cmd {
case linux.IPC_INFO:
info := r.IPCInfo(t)
_, err := info.CopyOut(t, buf)
return 0, nil, err
case linux.MSG_INFO:
msgInfo := r.MsgInfo(t)
_, err := msgInfo.CopyOut(t, buf)
return 0, nil, err
case linux.IPC_RMID:
return 0, nil, t.IPCNamespace().MsgqueueRegistry().Remove(id, creds)
return 0, nil, r.Remove(id, creds)
}
// Remaining commands use a queue.
queue, err := r.FindByID(id)
if err != nil {
return 0, nil, err
}
switch cmd {
case linux.MSG_STAT:
// Technically, we should be treating id as "an index into the kernel's
// internal array that maintains information about all shared memory
// segments on the system". Since we don't track segments in an array,
// we'll just pretend the msqid is the index and do the same thing as
// IPC_STAT. Linux also uses the index as the msqid.
fallthrough
case linux.IPC_STAT:
stat, err := queue.Stat(t)
if err != nil {
return 0, nil, err
}
_, err = stat.CopyOut(t, buf)
return 0, nil, err
case linux.MSG_STAT_ANY:
stat, err := queue.StatAny(t)
if err != nil {
return 0, nil, err
}
_, err = stat.CopyOut(t, buf)
return 0, nil, err
case linux.IPC_SET:
var ds linux.MsqidDS
if _, err := ds.CopyIn(t, buf); err != nil {
return 0, nil, linuxerr.EINVAL
}
err := queue.Set(t, &ds)
return 0, nil, err
default:
return 0, nil, linuxerr.EINVAL
}
+3 -16
View File
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
@@ -166,8 +165,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, err
}
perms := fs.FilePermsFromMode(linux.FileMode(s.SemPerm.Mode & 0777))
return 0, nil, ipcSet(t, id, auth.UID(s.SemPerm.UID), auth.GID(s.SemPerm.GID), perms)
return 0, nil, ipcSet(t, id, &s)
case linux.GETPID:
v, err := getPID(t, id, num)
@@ -243,24 +241,13 @@ func remove(t *kernel.Task, id ipc.ID) error {
return r.Remove(id, creds)
}
func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error {
func ipcSet(t *kernel.Task, id ipc.ID, ds *linux.SemidDS) error {
r := t.IPCNamespace().SemaphoreRegistry()
set := r.FindByID(id)
if set == nil {
return linuxerr.EINVAL
}
creds := auth.CredentialsFromContext(t)
kuid := creds.UserNamespace.MapToKUID(uid)
if !kuid.Ok() {
return linuxerr.EINVAL
}
kgid := creds.UserNamespace.MapToKGID(gid)
if !kgid.Ok() {
return linuxerr.EINVAL
}
owner := fs.FileOwner{UID: kuid, GID: kgid}
return set.Change(t, creds, owner, perms)
return set.Set(t, ds)
}
func ipcStat(t *kernel.Task, id ipc.ID) (*linux.SemidDS, error) {
+178 -2
View File
@@ -30,9 +30,15 @@ namespace gvisor {
namespace testing {
namespace {
constexpr int msgMax = 8192; // Max size for message in bytes.
// Source: include/uapi/linux/msg.h
constexpr int msgMnb = 16384; // Maximum number of bytes in a queue.
constexpr int msgMni = 32000; // Max number of identifiers.
constexpr int msgMnb = 16384; // Default max size of message queue in bytes.
constexpr int msgPool =
(msgMni * msgMnb / 1024); // Size of buffer pool used to hold message data.
constexpr int msgMap = msgMnb; // Maximum number of entries in message map.
constexpr int msgMax = 8192; // Maximum number of bytes in a single message.
constexpr int msgSsz = 16; // Message segment size.
constexpr int msgTql = msgMnb; // Maximum number of messages on all queues.
constexpr int kInterruptSignal = SIGALRM;
@@ -689,6 +695,176 @@ TEST(MsgqueueTest, InterruptSend) {
t.Join();
}
// Test msgctl with IPC_STAT option.
TEST(MsgqueueTest, MsgCtlIpcStat) {
auto start = absl::Now();
Queue queue(msgget(IPC_PRIVATE, 0600));
ASSERT_THAT(queue.get(), SyscallSucceeds());
const uid_t uid = getuid();
const gid_t gid = getgid();
const pid_t pid = getpid();
struct msqid_ds ds;
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_EQ(ds.msg_perm.__key, IPC_PRIVATE);
EXPECT_EQ(ds.msg_perm.uid, uid);
EXPECT_EQ(ds.msg_perm.gid, gid);
EXPECT_EQ(ds.msg_perm.cuid, uid);
EXPECT_EQ(ds.msg_perm.cgid, gid);
EXPECT_EQ(ds.msg_perm.mode, 0600);
EXPECT_EQ(ds.msg_stime, 0);
EXPECT_EQ(ds.msg_rtime, 0);
EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
EXPECT_EQ(ds.msg_cbytes, 0);
EXPECT_EQ(ds.msg_qnum, 0);
EXPECT_EQ(ds.msg_qbytes, msgMnb);
EXPECT_EQ(ds.msg_lspid, 0);
EXPECT_EQ(ds.msg_lrpid, 0);
// The timestamps only have a resolution of seconds; slow down so we actually
// see the timestamps change.
absl::SleepFor(absl::Seconds(1));
auto pre_send = absl::Now();
msgbuf buf;
ASSERT_THAT(msgsnd(queue.get(), &buf, msgSize, 0), SyscallSucceeds());
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_GE(ds.msg_stime, absl::ToTimeT(pre_send));
EXPECT_EQ(ds.msg_rtime, 0);
EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
EXPECT_EQ(ds.msg_cbytes, msgSize);
EXPECT_EQ(ds.msg_qnum, 1);
EXPECT_EQ(ds.msg_qbytes, msgMnb);
EXPECT_EQ(ds.msg_lspid, pid);
EXPECT_EQ(ds.msg_lrpid, 0);
absl::SleepFor(absl::Seconds(1));
auto pre_receive = absl::Now();
ASSERT_THAT(msgrcv(queue.get(), &buf, msgSize, 0, 0),
SyscallSucceedsWithValue(msgSize));
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_GE(ds.msg_stime, absl::ToTimeT(pre_send));
EXPECT_GE(ds.msg_rtime, absl::ToTimeT(pre_receive));
EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
EXPECT_EQ(ds.msg_cbytes, 0);
EXPECT_EQ(ds.msg_qnum, 0);
EXPECT_EQ(ds.msg_qbytes, msgMnb);
EXPECT_EQ(ds.msg_lspid, pid);
EXPECT_EQ(ds.msg_lrpid, pid);
}
// Test msgctl with IPC_STAT option on a write-only queue.
TEST(MsgqueueTest, MsgCtlIpcStatWriteOnly) {
// Drop CAP_IPC_OWNER which allows us to bypass permissions.
AutoCapability cap(CAP_IPC_OWNER, false);
Queue queue(msgget(IPC_PRIVATE, 0200));
ASSERT_THAT(queue.get(), SyscallSucceeds());
struct msqid_ds ds;
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds),
SyscallFailsWithErrno(EACCES));
}
// Test msgctl with IPC_SET option.
TEST(MsgqueueTest, MsgCtlIpcSet) {
Queue queue(msgget(IPC_PRIVATE, 0600));
ASSERT_THAT(queue.get(), SyscallSucceeds());
struct msqid_ds ds;
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_EQ(ds.msg_perm.mode, 0600);
ds.msg_perm.mode = 0777;
ASSERT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallSucceeds());
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_EQ(ds.msg_perm.mode, 0777);
}
// Test increasing msg_qbytes beyond limit with IPC_SET.
TEST(MsgqueueTest, MsgCtlIpcSetMaxBytes) {
// Drop CAP_SYS_RESOURCE which allows us to increase msg_qbytes beyond the
// system parameter MSGMNB.
AutoCapability cap(CAP_SYS_RESOURCE, false);
Queue queue(msgget(IPC_PRIVATE, 0600));
ASSERT_THAT(queue.get(), SyscallSucceeds());
struct msqid_ds ds;
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_EQ(ds.msg_qbytes, msgMnb);
ds.msg_qbytes = msgMnb - 10;
ASSERT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallSucceeds());
ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
EXPECT_EQ(ds.msg_qbytes, msgMnb - 10);
ds.msg_qbytes = msgMnb + 10;
EXPECT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallFailsWithErrno(EPERM));
}
// Test msgctl with IPC_INFO option.
TEST(MsgqueueTest, MsgCtlIpcInfo) {
struct msginfo info;
ASSERT_THAT(msgctl(0, IPC_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
SyscallSucceeds());
EXPECT_GT(info.msgmax, 0);
EXPECT_GT(info.msgmni, 0);
EXPECT_GT(info.msgmnb, 0);
EXPECT_EQ(info.msgpool, msgPool);
EXPECT_EQ(info.msgmap, msgMap);
EXPECT_EQ(info.msgssz, msgSsz);
EXPECT_EQ(info.msgtql, msgTql);
}
// Test msgctl with MSG_INFO option.
TEST(MsgqueueTest, MsgCtlMsgInfo) {
struct msginfo info;
ASSERT_THAT(msgctl(0, MSG_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
SyscallSucceeds());
EXPECT_GT(info.msgmax, 0);
EXPECT_GT(info.msgmni, 0);
EXPECT_GT(info.msgmnb, 0);
EXPECT_EQ(info.msgpool, 0); // Number of queues in the system.
EXPECT_EQ(info.msgmap, 0); // Total number of messages in all queues.
EXPECT_EQ(info.msgtql, 0); // Total number of bytes in all messages.
EXPECT_EQ(info.msgssz, msgSsz);
// Add a queue and a message.
Queue queue(msgget(IPC_PRIVATE, 0600));
ASSERT_THAT(queue.get(), SyscallSucceeds());
msgbuf buf;
ASSERT_THAT(msgsnd(queue.get(), &buf, msgSize, 0), SyscallSucceeds());
ASSERT_THAT(msgctl(0, MSG_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
SyscallSucceeds());
EXPECT_GT(info.msgmax, 0);
EXPECT_GT(info.msgmni, 0);
EXPECT_GT(info.msgmnb, 0);
EXPECT_EQ(info.msgpool, 1); // Number of queues in the system.
EXPECT_EQ(info.msgmap, 1); // Total number of messages in all queues.
EXPECT_EQ(info.msgtql, msgSize); // Total number of bytes in all messages.
EXPECT_EQ(info.msgssz, msgSsz);
}
} // namespace
} // namespace testing
} // namespace gvisor