How can I restrict filename characters?
Suppose I want to limit what characters are allowed in filenames. For example, I want file creation to fail if there is a \n
in the name.
Is there a way to enforce this?
If it matters, I prefer an answer for Arch Linux.
2 answers
A file can be created by the human user directly interacting on the laptop, or indirectly by a program.
In principle, you cannot detect statically all file creation calls in a software (even if you have access to its source code), because of Rice's theorem.
In practice, you could (with a lot of efforts) customize your user interface (e.g. graphical desktop, like GNOME) to limit in most (but not all) cases what the user is permitted to create interactively.
You could even (in theory) design your own operating system which has no files at all (but persistent objects). See the old Tunes project.
3 comment threads
You could try and convince Linux kernel maintainers, but they are reticent to that. I wonder what they'll do after POSIX.1-202x (Issue 8) possibly will forbid those.
Maybe you could patch your kernel. It would be an interesting thing to do, even if just for fun. I've never done such a thing, so maybe I'm a bit off, but after a quick inspection of the Linux kernel v6.4 code, I think I found a place where you could hack this thing.
Here's how I found the place:
I started inspecting open(2), which is the first thing that comes to my mind for creating file names.
alx@debian:~/src/linux/linux/6.4$ grepc -tfsd open
./fs/open.c:1376:
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
if (force_o_largefile())
flags |= O_LARGEFILE;
return do_sys_open(AT_FDCWD, filename, flags, mode);
}
./fs/open.c:1421:
COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
return do_sys_open(AT_FDCWD, filename, flags, mode);
}
Okay, let's see do_sys_open().
$ grepc do_sys_open
./include/linux/fs.h:2340:
extern long do_sys_open(int dfd, const char __user *filename, int flags,
umode_t mode);
./fs/open.c:1369:
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_how how = build_open_how(flags, mode);
return do_sys_openat2(dfd, filename, &how);
}
The filename is only passed to do_sys_openat2(); let's see that one.
$ grepc do_sys_openat2
./fs/open.c:1340:
static long do_sys_openat2(int dfd, const char __user *filename,
struct open_how *how)
{
struct open_flags op;
int fd = build_open_flags(how, &op);
struct filename *tmp;
if (fd)
return fd;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
fd = get_unused_fd_flags(how->flags);
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, &op);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f);
fd_install(fd, f);
}
}
putname(tmp);
return fd;
}
There, the file name seems to be read in getname().
$ grepc getname
./include/linux/fs.h:2365:
extern struct filename *getname(const char __user *);
./fs/namei.c:216:
struct filename *
getname(const char __user * filename)
{
return getname_flags(filename, 0, NULL);
}
And getname_flags() seems to be the function that is checking the file name.
$ grepc getname_flags
./include/linux/fs.h:2363:
extern struct filename *getname_flags(const char __user *, int, int *);
./fs/namei.c:129:
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
struct filename *result;
char *kname;
int len;
result = audit_reusename(filename);
if (result)
return result;
result = __getname();
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
/*
* First, try to embed the struct filename inside the names_cache
* allocation
*/
kname = (char *)result->iname;
result->name = kname;
len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
if (unlikely(len < 0)) {
__putname(result);
return ERR_PTR(len);
}
/*
* Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
* separate struct filename so we can dedicate the entire
* names_cache allocation for the pathname, and re-do the copy from
* userland.
*/
if (unlikely(len == EMBEDDED_NAME_MAX)) {
const size_t size = offsetof(struct filename, iname[1]);
kname = (char *)result;
/*
* size is chosen that way we to guarantee that
* result->iname[0] is within the same object and that
* kname can't be equal to result->iname, no matter what.
*/
result = kzalloc(size, GFP_KERNEL);
if (unlikely(!result)) {
__putname(kname);
return ERR_PTR(-ENOMEM);
}
result->name = kname;
len = strncpy_from_user(kname, filename, PATH_MAX);
if (unlikely(len < 0)) {
__putname(kname);
kfree(result);
return ERR_PTR(len);
}
if (unlikely(len == PATH_MAX)) {
__putname(kname);
kfree(result);
return ERR_PTR(-ENAMETOOLONG);
}
}
result->refcnt = 1;
/* The empty path is special. */
if (unlikely(!len)) {
if (empty)
*empty = 1;
if (!(flags & LOOKUP_EMPTY)) {
putname(result);
return ERR_PTR(-ENOENT);
}
}
result->uptr = filename;
result->aname = NULL;
audit_getname(result);
return result;
}
You could add something there to check if you see any \n
(or could check for anything in the range [1, 31]) and return ERR_PTR(-EINVAL);
.
(If you try it and it works, I'd be curious to know your results.)
1 comment thread