mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
scsi: fs: remove exofs
This was an example for using the SCSI OSD protocol, which we're trying to remove. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
972248e911
commit
80f2121380
@ -1,185 +0,0 @@
|
||||
===============================================================================
|
||||
WHAT IS EXOFS?
|
||||
===============================================================================
|
||||
|
||||
exofs is a file system that uses an OSD and exports the API of a normal Linux
|
||||
file system. Users access exofs like any other local file system, and exofs
|
||||
will in turn issue commands to the local OSD initiator.
|
||||
|
||||
OSD is a new T10 command set that views storage devices not as a large/flat
|
||||
array of sectors but as a container of objects, each having a length, quota,
|
||||
time attributes and more. Each object is addressed by a 64bit ID, and is
|
||||
contained in a 64bit ID partition. Each object has associated attributes
|
||||
attached to it, which are integral part of the object and provide metadata about
|
||||
the object. The standard defines some common obligatory attributes, but user
|
||||
attributes can be added as needed.
|
||||
|
||||
===============================================================================
|
||||
ENVIRONMENT
|
||||
===============================================================================
|
||||
|
||||
To use this file system, you need to have an object store to run it on. You
|
||||
may download a target from:
|
||||
http://open-osd.org
|
||||
|
||||
See Documentation/scsi/osd.txt for how to setup a working osd environment.
|
||||
|
||||
===============================================================================
|
||||
USAGE
|
||||
===============================================================================
|
||||
|
||||
1. Download and compile exofs and open-osd initiator:
|
||||
You need an external Kernel source tree or kernel headers from your
|
||||
distribution. (anything based on 2.6.26 or later).
|
||||
|
||||
a. download open-osd including exofs source using:
|
||||
[parent-directory]$ git clone git://git.open-osd.org/open-osd.git
|
||||
|
||||
b. Build the library module like this:
|
||||
[parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
|
||||
|
||||
This will build both the open-osd initiator as well as the exofs kernel
|
||||
module. Use whatever parameters you compiled your Kernel with and
|
||||
$(KER_DIR) above pointing to the Kernel you compile against. See the file
|
||||
open-osd/top-level-Makefile for an example.
|
||||
|
||||
2. Get the OSD initiator and target set up properly, and login to the target.
|
||||
See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
|
||||
for example script that does all these steps.
|
||||
|
||||
3. Insmod the exofs.ko module:
|
||||
[exofs]$ insmod exofs.ko
|
||||
|
||||
4. Make sure the directory where you want to mount exists. If not, create it.
|
||||
(For example, mkdir /mnt/exofs)
|
||||
|
||||
5. At first run you will need to invoke the mkfs.exofs application
|
||||
|
||||
As an example, this will create the file system on:
|
||||
/dev/osd0 partition ID 65536
|
||||
|
||||
mkfs.exofs --pid=65536 --format /dev/osd0
|
||||
|
||||
The --format is optional. If not specified, no OSD_FORMAT will be
|
||||
performed and a clean file system will be created in the specified pid,
|
||||
in the available space of the target. (Use --format=size_in_meg to limit
|
||||
the total LUN space available)
|
||||
|
||||
If pid already exists, it will be deleted and a new one will be created in
|
||||
its place. Be careful.
|
||||
|
||||
An exofs lives inside a single OSD partition. You can create multiple exofs
|
||||
filesystems on the same device using multiple pids.
|
||||
|
||||
(run mkfs.exofs without any parameters for usage help message)
|
||||
|
||||
6. Mount the file system.
|
||||
|
||||
For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
|
||||
|
||||
mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
|
||||
|
||||
7. For reference (See do-exofs example script):
|
||||
do-exofs start - an example of how to perform the above steps.
|
||||
do-exofs stop - an example of how to unmount the file system.
|
||||
do-exofs format - an example of how to format and mkfs a new exofs.
|
||||
|
||||
8. Extra compilation flags (uncomment in fs/exofs/Kbuild):
|
||||
CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
|
||||
|
||||
===============================================================================
|
||||
exofs mount options
|
||||
===============================================================================
|
||||
Similar to any mount command:
|
||||
mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
|
||||
|
||||
Where:
|
||||
-t exofs: specifies the exofs file system
|
||||
|
||||
/dev/osdX: X is a decimal number. /dev/osdX was created after a successful
|
||||
login into an OSD target.
|
||||
|
||||
mount_exofs_directory: The directory to mount the file system on
|
||||
|
||||
exofs specific options: Options are separated by commas (,)
|
||||
pid=<integer> - The partition number to mount/create as
|
||||
container of the filesystem.
|
||||
This option is mandatory. integer can be
|
||||
Hex by pre-pending an 0x to the number.
|
||||
osdname=<id> - Mount by a device's osdname.
|
||||
osdname is usually a 36 character uuid of the
|
||||
form "d2683732-c906-4ee1-9dbd-c10c27bb40df".
|
||||
It is one of the device's uuid specified in the
|
||||
mkfs.exofs format command.
|
||||
If this option is specified then the /dev/osdX
|
||||
above can be empty and is ignored.
|
||||
to=<integer> - Timeout in ticks for a single command.
|
||||
default is (60 * HZ) [for debugging only]
|
||||
|
||||
===============================================================================
|
||||
DESIGN
|
||||
===============================================================================
|
||||
|
||||
* The file system control block (AKA on-disk superblock) resides in an object
|
||||
with a special ID (defined in common.h).
|
||||
Information included in the file system control block is used to fill the
|
||||
in-memory superblock structure at mount time. This object is created before
|
||||
the file system is used by mkexofs.c. It contains information such as:
|
||||
- The file system's magic number
|
||||
- The next inode number to be allocated
|
||||
|
||||
* Each file resides in its own object and contains the data (and it will be
|
||||
possible to extend the file over multiple objects, though this has not been
|
||||
implemented yet).
|
||||
|
||||
* A directory is treated as a file, and essentially contains a list of <file
|
||||
name, inode #> pairs for files that are found in that directory. The object
|
||||
IDs correspond to the files' inode numbers and will be allocated according to
|
||||
a bitmap (stored in a separate object). Now they are allocated using a
|
||||
counter.
|
||||
|
||||
* Each file's control block (AKA on-disk inode) is stored in its object's
|
||||
attributes. This applies to both regular files and other types (directories,
|
||||
device files, symlinks, etc.).
|
||||
|
||||
* Credentials are generated per object (inode and superblock) when they are
|
||||
created in memory (read from disk or created). The credential works for all
|
||||
operations and is used as long as the object remains in memory.
|
||||
|
||||
* Async OSD operations are used whenever possible, but the target may execute
|
||||
them out of order. The operations that concern us are create, delete,
|
||||
readpage, writepage, update_inode, and truncate. The following pairs of
|
||||
operations should execute in the order written, and we need to prevent them
|
||||
from executing in reverse order:
|
||||
- The following are handled with the OBJ_CREATED and OBJ_2BCREATED
|
||||
flags. OBJ_CREATED is set when we know the object exists on the OSD -
|
||||
in create's callback function, and when we successfully do a
|
||||
read_inode.
|
||||
OBJ_2BCREATED is set in the beginning of the create function, so we
|
||||
know that we should wait.
|
||||
- create/delete: delete should wait until the object is created
|
||||
on the OSD.
|
||||
- create/readpage: readpage should be able to return a page
|
||||
full of zeroes in this case. If there was a write already
|
||||
en-route (i.e. create, writepage, readpage) then the page
|
||||
would be locked, and so it would really be the same as
|
||||
create/writepage.
|
||||
- create/writepage: if writepage is called for a sync write, it
|
||||
should wait until the object is created on the OSD.
|
||||
Otherwise, it should just return.
|
||||
- create/truncate: truncate should wait until the object is
|
||||
created on the OSD.
|
||||
- create/update_inode: update_inode should wait until the
|
||||
object is created on the OSD.
|
||||
- Handled by VFS locks:
|
||||
- readpage/delete: shouldn't happen because of page lock.
|
||||
- writepage/delete: shouldn't happen because of page lock.
|
||||
- readpage/writepage: shouldn't happen because of page lock.
|
||||
|
||||
===============================================================================
|
||||
LICENSE/COPYRIGHT
|
||||
===============================================================================
|
||||
The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
|
||||
version 2.6.10). All files include the original copyrights, and the license
|
||||
is GPL version 2 (only version 2, as is true for the Linux kernel). The
|
||||
Linux kernel can be downloaded from www.kernel.org.
|
@ -24,11 +24,6 @@ osd-uld:
|
||||
platform, both for the in-kernel initiator as well as connected targets. It
|
||||
currently has no useful user-mode API, though it could have if need be.
|
||||
|
||||
exofs:
|
||||
Is an OSD based Linux file system. It uses the osd-initiator and osd-uld,
|
||||
to export a usable file system for users.
|
||||
See Documentation/filesystems/exofs.txt for more details
|
||||
|
||||
osd target:
|
||||
There are no current plans for an OSD target implementation in kernel. For all
|
||||
needs, a user-mode target that is based on the scsi tgt target framework is
|
||||
|
@ -11390,7 +11390,6 @@ M: Boaz Harrosh <ooo@electrozaur.com>
|
||||
S: Maintained
|
||||
F: drivers/scsi/osd/
|
||||
F: include/scsi/osd_*
|
||||
F: fs/exofs/
|
||||
|
||||
OV2659 OMNIVISION SENSOR DRIVER
|
||||
M: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
|
||||
|
@ -254,12 +254,9 @@ source "fs/romfs/Kconfig"
|
||||
source "fs/pstore/Kconfig"
|
||||
source "fs/sysv/Kconfig"
|
||||
source "fs/ufs/Kconfig"
|
||||
source "fs/exofs/Kconfig"
|
||||
|
||||
endif # MISC_FILESYSTEMS
|
||||
|
||||
source "fs/exofs/Kconfig.ore"
|
||||
|
||||
menuconfig NETWORK_FILESYSTEMS
|
||||
bool "Network File Systems"
|
||||
default y
|
||||
|
@ -124,7 +124,6 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/
|
||||
obj-$(CONFIG_BTRFS_FS) += btrfs/
|
||||
obj-$(CONFIG_GFS2_FS) += gfs2/
|
||||
obj-$(CONFIG_F2FS_FS) += f2fs/
|
||||
obj-y += exofs/ # Multiple modules
|
||||
obj-$(CONFIG_CEPH_FS) += ceph/
|
||||
obj-$(CONFIG_PSTORE) += pstore/
|
||||
obj-$(CONFIG_EFIVAR_FS) += efivarfs/
|
||||
|
@ -1,3 +0,0 @@
|
||||
- Out-of-space may cause a severe problem if the object (and directory entry)
|
||||
were written, but the inode attributes failed. Then if the filesystem was
|
||||
unmounted and mounted the kernel can get into an endless loop doing a readdir.
|
@ -1,20 +0,0 @@
|
||||
#
|
||||
# Kbuild for the EXOFS module
|
||||
#
|
||||
# Copyright (C) 2008 Panasas Inc. All rights reserved.
|
||||
#
|
||||
# Authors:
|
||||
# Boaz Harrosh <ooo@electrozaur.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2
|
||||
#
|
||||
# Kbuild - Gets included from the Kernels Makefile and build system
|
||||
#
|
||||
|
||||
# ore module library
|
||||
libore-y := ore.o ore_raid.o
|
||||
obj-$(CONFIG_ORE) += libore.o
|
||||
|
||||
exofs-y := inode.o file.o namei.o dir.o super.o sys.o
|
||||
obj-$(CONFIG_EXOFS_FS) += exofs.o
|
@ -1,13 +0,0 @@
|
||||
config EXOFS_FS
|
||||
tristate "exofs: OSD based file system support"
|
||||
depends on SCSI_OSD_ULD
|
||||
help
|
||||
EXOFS is a file system that uses an OSD storage device,
|
||||
as its backing storage.
|
||||
|
||||
# Debugging-related stuff
|
||||
config EXOFS_DEBUG
|
||||
bool "Enable debugging"
|
||||
depends on EXOFS_FS
|
||||
help
|
||||
This option enables EXOFS debug prints.
|
@ -1,14 +0,0 @@
|
||||
# ORE - Objects Raid Engine (libore.ko)
|
||||
#
|
||||
# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
|
||||
# for every ORE user we do it like this. Any user should add itself here
|
||||
# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
|
||||
# selected here, and we default to "ON". So in effect it is like been
|
||||
# selected by any of the users.
|
||||
config ORE
|
||||
tristate
|
||||
depends on EXOFS_FS || PNFS_OBJLAYOUT
|
||||
select ASYNC_XOR
|
||||
select RAID6_PQ
|
||||
select ASYNC_PQ
|
||||
default SCSI_OSD_ULD
|
@ -1,262 +0,0 @@
|
||||
/*
|
||||
* common.h - Common definitions for both Kernel and user-mode utilities
|
||||
*
|
||||
* Copyright (C) 2005, 2006
|
||||
* Avishay Traeger (avishay@gmail.com)
|
||||
* Copyright (C) 2008, 2009
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* Copyrights for code taken from ext2:
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/inode.c
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation. Since it is based on ext2, and the only
|
||||
* valid version of GPL for the Linux kernel is version 2, the only valid
|
||||
* version of GPL for exofs is version 2.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef __EXOFS_COM_H__
|
||||
#define __EXOFS_COM_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <scsi/osd_attributes.h>
|
||||
#include <scsi/osd_initiator.h>
|
||||
#include <scsi/osd_sec.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Object ID related defines
|
||||
* NOTE: inode# = object ID - EXOFS_OBJ_OFF
|
||||
****************************************************************************/
|
||||
#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */
|
||||
#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */
|
||||
#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */
|
||||
#define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */
|
||||
#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
|
||||
|
||||
/* exofs Application specific page/attribute */
|
||||
/* Inode attrs */
|
||||
# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
|
||||
# define EXOFS_ATTR_INODE_DATA 1
|
||||
# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
|
||||
# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
|
||||
/* Partition attrs */
|
||||
# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
|
||||
# define EXOFS_ATTR_SB_STATS 1
|
||||
|
||||
/*
|
||||
* The maximum number of files we can have is limited by the size of the
|
||||
* inode number. This is the largest object ID that the file system supports.
|
||||
* Object IDs 0, 1, and 2 are always in use (see above defines).
|
||||
*/
|
||||
enum {
|
||||
EXOFS_MAX_INO_ID = (sizeof(ino_t) * 8 == 64) ? ULLONG_MAX :
|
||||
(1ULL << (sizeof(ino_t) * 8ULL - 1ULL)),
|
||||
EXOFS_MAX_ID = (EXOFS_MAX_INO_ID - 1 - EXOFS_OBJ_OFF),
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Misc.
|
||||
****************************************************************************/
|
||||
#define EXOFS_BLKSHIFT 12
|
||||
#define EXOFS_BLKSIZE (1UL << EXOFS_BLKSHIFT)
|
||||
|
||||
/****************************************************************************
|
||||
* superblock-related things
|
||||
****************************************************************************/
|
||||
#define EXOFS_SUPER_MAGIC 0x5DF5
|
||||
|
||||
/*
|
||||
* The file system control block - stored in object EXOFS_SUPER_ID's data.
|
||||
* This is where the in-memory superblock is stored on disk.
|
||||
*/
|
||||
enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
|
||||
struct exofs_fscb {
|
||||
__le64 s_nextid; /* Only used after mkfs */
|
||||
__le64 s_numfiles; /* Only used after mkfs */
|
||||
__le32 s_version; /* == EXOFS_FSCB_VER */
|
||||
__le16 s_magic; /* Magic signature */
|
||||
__le16 s_newfs; /* Non-zero if this is a new fs */
|
||||
|
||||
/* From here on it's a static part, only written by mkexofs */
|
||||
__le64 s_dev_table_oid; /* Resurved, not used */
|
||||
__le64 s_dev_table_count; /* == 0 means no dev_table */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This struct is set on the FS partition's attributes.
|
||||
* [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
|
||||
* with the create command, to atomically persist the sb writeable information.
|
||||
*/
|
||||
struct exofs_sb_stats {
|
||||
__le64 s_nextid; /* Highest object ID used */
|
||||
__le64 s_numfiles; /* Number of files on fs */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Describes the raid used in the FS. It is part of the device table.
|
||||
* This here is taken from the pNFS-objects definition. In exofs we
|
||||
* use one raid policy through-out the filesystem. (NOTE: the funny
|
||||
* alignment at beginning. We take care of it at exofs_device_table.
|
||||
*/
|
||||
struct exofs_dt_data_map {
|
||||
__le32 cb_num_comps;
|
||||
__le64 cb_stripe_unit;
|
||||
__le32 cb_group_width;
|
||||
__le32 cb_group_depth;
|
||||
__le32 cb_mirror_cnt;
|
||||
__le32 cb_raid_algorithm;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is an osd device information descriptor. It is a single entry in
|
||||
* the exofs device table. It describes an osd target lun which
|
||||
* contains data belonging to this FS. (Same partition_id on all devices)
|
||||
*/
|
||||
struct exofs_dt_device_info {
|
||||
__le32 systemid_len;
|
||||
u8 systemid[OSD_SYSTEMID_LEN];
|
||||
__le64 long_name_offset; /* If !0 then offset-in-file */
|
||||
__le32 osdname_len; /* */
|
||||
u8 osdname[44]; /* Embbeded, Usually an asci uuid */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data.
|
||||
* It contains the raid used for this multy-device FS and an array of
|
||||
* participating devices.
|
||||
*/
|
||||
struct exofs_device_table {
|
||||
__le32 dt_version; /* == EXOFS_DT_VER */
|
||||
struct exofs_dt_data_map dt_data_map; /* Raid policy to use */
|
||||
|
||||
/* Resurved space For future use. Total includeing this:
|
||||
* (8 * sizeof(le64))
|
||||
*/
|
||||
__le64 __Resurved[4];
|
||||
|
||||
__le64 dt_num_devices; /* Array size */
|
||||
struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */
|
||||
} __packed;
|
||||
|
||||
/****************************************************************************
|
||||
* inode-related things
|
||||
****************************************************************************/
|
||||
#define EXOFS_IDATA 5
|
||||
|
||||
/*
|
||||
* The file control block - stored in an object's attributes. This is where
|
||||
* the in-memory inode is stored on disk.
|
||||
*/
|
||||
struct exofs_fcb {
|
||||
__le64 i_size; /* Size of the file */
|
||||
__le16 i_mode; /* File mode */
|
||||
__le16 i_links_count; /* Links count */
|
||||
__le32 i_uid; /* Owner Uid */
|
||||
__le32 i_gid; /* Group Id */
|
||||
__le32 i_atime; /* Access time */
|
||||
__le32 i_ctime; /* Creation time */
|
||||
__le32 i_mtime; /* Modification time */
|
||||
__le32 i_flags; /* File flags (unused for now)*/
|
||||
__le32 i_generation; /* File version (for NFS) */
|
||||
__le32 i_data[EXOFS_IDATA]; /* Short symlink names and device #s */
|
||||
};
|
||||
|
||||
#define EXOFS_INO_ATTR_SIZE sizeof(struct exofs_fcb)
|
||||
|
||||
/* This is the Attribute the fcb is stored in */
|
||||
static const struct __weak osd_attr g_attr_inode_data = ATTR_DEF(
|
||||
EXOFS_APAGE_FS_DATA,
|
||||
EXOFS_ATTR_INODE_DATA,
|
||||
EXOFS_INO_ATTR_SIZE);
|
||||
|
||||
/****************************************************************************
|
||||
* dentry-related things
|
||||
****************************************************************************/
|
||||
#define EXOFS_NAME_LEN 255
|
||||
|
||||
/*
|
||||
* The on-disk directory entry
|
||||
*/
|
||||
struct exofs_dir_entry {
|
||||
__le64 inode_no; /* inode number */
|
||||
__le16 rec_len; /* directory entry length */
|
||||
u8 name_len; /* name length */
|
||||
u8 file_type; /* umm...file type */
|
||||
char name[EXOFS_NAME_LEN]; /* file name */
|
||||
};
|
||||
|
||||
enum {
|
||||
EXOFS_FT_UNKNOWN,
|
||||
EXOFS_FT_REG_FILE,
|
||||
EXOFS_FT_DIR,
|
||||
EXOFS_FT_CHRDEV,
|
||||
EXOFS_FT_BLKDEV,
|
||||
EXOFS_FT_FIFO,
|
||||
EXOFS_FT_SOCK,
|
||||
EXOFS_FT_SYMLINK,
|
||||
EXOFS_FT_MAX
|
||||
};
|
||||
|
||||
#define EXOFS_DIR_PAD 4
|
||||
#define EXOFS_DIR_ROUND (EXOFS_DIR_PAD - 1)
|
||||
#define EXOFS_DIR_REC_LEN(name_len) \
|
||||
(((name_len) + offsetof(struct exofs_dir_entry, name) + \
|
||||
EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
|
||||
|
||||
/*
|
||||
* The on-disk (optional) layout structure.
|
||||
* sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT
|
||||
* attribute, attached to any inode, usually to a directory.
|
||||
*/
|
||||
|
||||
enum exofs_inode_layout_gen_functions {
|
||||
LAYOUT_MOVING_WINDOW = 0,
|
||||
LAYOUT_IMPLICT = 1,
|
||||
};
|
||||
|
||||
struct exofs_on_disk_inode_layout {
|
||||
__le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */
|
||||
__le16 pad;
|
||||
union {
|
||||
/* gen_func == LAYOUT_MOVING_WINDOW (default) */
|
||||
struct exofs_layout_sliding_window {
|
||||
__le32 num_devices; /* first n devices in global-table*/
|
||||
} sliding_window __packed;
|
||||
|
||||
/* gen_func == LAYOUT_IMPLICT */
|
||||
struct exofs_layout_implict_list {
|
||||
struct exofs_dt_data_map data_map;
|
||||
/* Variable array of size data_map.cb_num_comps. These
|
||||
* are device indexes of the devices in the global table
|
||||
*/
|
||||
__le32 dev_indexes[];
|
||||
} implict __packed;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs)
|
||||
{
|
||||
return sizeof(struct exofs_on_disk_inode_layout) +
|
||||
max_devs * sizeof(__le32);
|
||||
}
|
||||
|
||||
#endif /*ifndef __EXOFS_COM_H__*/
|
661
fs/exofs/dir.c
661
fs/exofs/dir.c
@ -1,661 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005, 2006
|
||||
* Avishay Traeger (avishay@gmail.com)
|
||||
* Copyright (C) 2008, 2009
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* Copyrights for code taken from ext2:
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/inode.c
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation. Since it is based on ext2, and the only
|
||||
* valid version of GPL for the Linux kernel is version 2, the only valid
|
||||
* version of GPL for exofs is version 2.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <linux/iversion.h>
|
||||
#include "exofs.h"
|
||||
|
||||
static inline unsigned exofs_chunk_size(struct inode *inode)
|
||||
{
|
||||
return inode->i_sb->s_blocksize;
|
||||
}
|
||||
|
||||
static inline void exofs_put_page(struct page *page)
|
||||
{
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
|
||||
{
|
||||
loff_t last_byte = inode->i_size;
|
||||
|
||||
last_byte -= page_nr << PAGE_SHIFT;
|
||||
if (last_byte > PAGE_SIZE)
|
||||
last_byte = PAGE_SIZE;
|
||||
return last_byte;
|
||||
}
|
||||
|
||||
static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct inode *dir = mapping->host;
|
||||
int err = 0;
|
||||
|
||||
inode_inc_iversion(dir);
|
||||
|
||||
if (!PageUptodate(page))
|
||||
SetPageUptodate(page);
|
||||
|
||||
if (pos+len > dir->i_size) {
|
||||
i_size_write(dir, pos+len);
|
||||
mark_inode_dirty(dir);
|
||||
}
|
||||
set_page_dirty(page);
|
||||
|
||||
if (IS_DIRSYNC(dir))
|
||||
err = write_one_page(page);
|
||||
else
|
||||
unlock_page(page);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool exofs_check_page(struct page *page)
|
||||
{
|
||||
struct inode *dir = page->mapping->host;
|
||||
unsigned chunk_size = exofs_chunk_size(dir);
|
||||
char *kaddr = page_address(page);
|
||||
unsigned offs, rec_len;
|
||||
unsigned limit = PAGE_SIZE;
|
||||
struct exofs_dir_entry *p;
|
||||
char *error;
|
||||
|
||||
/* if the page is the last one in the directory */
|
||||
if ((dir->i_size >> PAGE_SHIFT) == page->index) {
|
||||
limit = dir->i_size & ~PAGE_MASK;
|
||||
if (limit & (chunk_size - 1))
|
||||
goto Ebadsize;
|
||||
if (!limit)
|
||||
goto out;
|
||||
}
|
||||
for (offs = 0; offs <= limit - EXOFS_DIR_REC_LEN(1); offs += rec_len) {
|
||||
p = (struct exofs_dir_entry *)(kaddr + offs);
|
||||
rec_len = le16_to_cpu(p->rec_len);
|
||||
|
||||
if (rec_len < EXOFS_DIR_REC_LEN(1))
|
||||
goto Eshort;
|
||||
if (rec_len & 3)
|
||||
goto Ealign;
|
||||
if (rec_len < EXOFS_DIR_REC_LEN(p->name_len))
|
||||
goto Enamelen;
|
||||
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
|
||||
goto Espan;
|
||||
}
|
||||
if (offs != limit)
|
||||
goto Eend;
|
||||
out:
|
||||
SetPageChecked(page);
|
||||
return true;
|
||||
|
||||
Ebadsize:
|
||||
EXOFS_ERR("ERROR [exofs_check_page]: "
|
||||
"size of directory(0x%lx) is not a multiple of chunk size\n",
|
||||
dir->i_ino
|
||||
);
|
||||
goto fail;
|
||||
Eshort:
|
||||
error = "rec_len is smaller than minimal";
|
||||
goto bad_entry;
|
||||
Ealign:
|
||||
error = "unaligned directory entry";
|
||||
goto bad_entry;
|
||||
Enamelen:
|
||||
error = "rec_len is too small for name_len";
|
||||
goto bad_entry;
|
||||
Espan:
|
||||
error = "directory entry across blocks";
|
||||
goto bad_entry;
|
||||
bad_entry:
|
||||
EXOFS_ERR(
|
||||
"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
|
||||
"offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n",
|
||||
dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
|
||||
_LLU(le64_to_cpu(p->inode_no)),
|
||||
rec_len, p->name_len);
|
||||
goto fail;
|
||||
Eend:
|
||||
p = (struct exofs_dir_entry *)(kaddr + offs);
|
||||
EXOFS_ERR("ERROR [exofs_check_page]: "
|
||||
"entry in directory(0x%lx) spans the page boundary"
|
||||
"offset=%lu, inode=0x%llx\n",
|
||||
dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
|
||||
_LLU(le64_to_cpu(p->inode_no)));
|
||||
fail:
|
||||
SetPageError(page);
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct page *exofs_get_page(struct inode *dir, unsigned long n)
|
||||
{
|
||||
struct address_space *mapping = dir->i_mapping;
|
||||
struct page *page = read_mapping_page(mapping, n, NULL);
|
||||
|
||||
if (!IS_ERR(page)) {
|
||||
kmap(page);
|
||||
if (unlikely(!PageChecked(page))) {
|
||||
if (PageError(page) || !exofs_check_page(page))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
return page;
|
||||
|
||||
fail:
|
||||
exofs_put_page(page);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
static inline int exofs_match(int len, const unsigned char *name,
|
||||
struct exofs_dir_entry *de)
|
||||
{
|
||||
if (len != de->name_len)
|
||||
return 0;
|
||||
if (!de->inode_no)
|
||||
return 0;
|
||||
return !memcmp(name, de->name, len);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct exofs_dir_entry *exofs_next_entry(struct exofs_dir_entry *p)
|
||||
{
|
||||
return (struct exofs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
exofs_validate_entry(char *base, unsigned offset, unsigned mask)
|
||||
{
|
||||
struct exofs_dir_entry *de = (struct exofs_dir_entry *)(base + offset);
|
||||
struct exofs_dir_entry *p =
|
||||
(struct exofs_dir_entry *)(base + (offset&mask));
|
||||
while ((char *)p < (char *)de) {
|
||||
if (p->rec_len == 0)
|
||||
break;
|
||||
p = exofs_next_entry(p);
|
||||
}
|
||||
return (char *)p - base;
|
||||
}
|
||||
|
||||
static unsigned char exofs_filetype_table[EXOFS_FT_MAX] = {
|
||||
[EXOFS_FT_UNKNOWN] = DT_UNKNOWN,
|
||||
[EXOFS_FT_REG_FILE] = DT_REG,
|
||||
[EXOFS_FT_DIR] = DT_DIR,
|
||||
[EXOFS_FT_CHRDEV] = DT_CHR,
|
||||
[EXOFS_FT_BLKDEV] = DT_BLK,
|
||||
[EXOFS_FT_FIFO] = DT_FIFO,
|
||||
[EXOFS_FT_SOCK] = DT_SOCK,
|
||||
[EXOFS_FT_SYMLINK] = DT_LNK,
|
||||
};
|
||||
|
||||
#define S_SHIFT 12
|
||||
static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||
[S_IFREG >> S_SHIFT] = EXOFS_FT_REG_FILE,
|
||||
[S_IFDIR >> S_SHIFT] = EXOFS_FT_DIR,
|
||||
[S_IFCHR >> S_SHIFT] = EXOFS_FT_CHRDEV,
|
||||
[S_IFBLK >> S_SHIFT] = EXOFS_FT_BLKDEV,
|
||||
[S_IFIFO >> S_SHIFT] = EXOFS_FT_FIFO,
|
||||
[S_IFSOCK >> S_SHIFT] = EXOFS_FT_SOCK,
|
||||
[S_IFLNK >> S_SHIFT] = EXOFS_FT_SYMLINK,
|
||||
};
|
||||
|
||||
static inline
|
||||
void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
|
||||
{
|
||||
umode_t mode = inode->i_mode;
|
||||
de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
|
||||
}
|
||||
|
||||
static int
|
||||
exofs_readdir(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
loff_t pos = ctx->pos;
|
||||
struct inode *inode = file_inode(file);
|
||||
unsigned int offset = pos & ~PAGE_MASK;
|
||||
unsigned long n = pos >> PAGE_SHIFT;
|
||||
unsigned long npages = dir_pages(inode);
|
||||
unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
|
||||
bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
|
||||
|
||||
if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
|
||||
return 0;
|
||||
|
||||
for ( ; n < npages; n++, offset = 0) {
|
||||
char *kaddr, *limit;
|
||||
struct exofs_dir_entry *de;
|
||||
struct page *page = exofs_get_page(inode, n);
|
||||
|
||||
if (IS_ERR(page)) {
|
||||
EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
|
||||
inode->i_ino);
|
||||
ctx->pos += PAGE_SIZE - offset;
|
||||
return PTR_ERR(page);
|
||||
}
|
||||
kaddr = page_address(page);
|
||||
if (unlikely(need_revalidate)) {
|
||||
if (offset) {
|
||||
offset = exofs_validate_entry(kaddr, offset,
|
||||
chunk_mask);
|
||||
ctx->pos = (n<<PAGE_SHIFT) + offset;
|
||||
}
|
||||
file->f_version = inode_query_iversion(inode);
|
||||
need_revalidate = false;
|
||||
}
|
||||
de = (struct exofs_dir_entry *)(kaddr + offset);
|
||||
limit = kaddr + exofs_last_byte(inode, n) -
|
||||
EXOFS_DIR_REC_LEN(1);
|
||||
for (; (char *)de <= limit; de = exofs_next_entry(de)) {
|
||||
if (de->rec_len == 0) {
|
||||
EXOFS_ERR("ERROR: "
|
||||
"zero-length entry in directory(0x%lx)\n",
|
||||
inode->i_ino);
|
||||
exofs_put_page(page);
|
||||
return -EIO;
|
||||
}
|
||||
if (de->inode_no) {
|
||||
unsigned char t;
|
||||
|
||||
if (de->file_type < EXOFS_FT_MAX)
|
||||
t = exofs_filetype_table[de->file_type];
|
||||
else
|
||||
t = DT_UNKNOWN;
|
||||
|
||||
if (!dir_emit(ctx, de->name, de->name_len,
|
||||
le64_to_cpu(de->inode_no),
|
||||
t)) {
|
||||
exofs_put_page(page);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
ctx->pos += le16_to_cpu(de->rec_len);
|
||||
}
|
||||
exofs_put_page(page);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
|
||||
struct dentry *dentry, struct page **res_page)
|
||||
{
|
||||
const unsigned char *name = dentry->d_name.name;
|
||||
int namelen = dentry->d_name.len;
|
||||
unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
|
||||
unsigned long start, n;
|
||||
unsigned long npages = dir_pages(dir);
|
||||
struct page *page = NULL;
|
||||
struct exofs_i_info *oi = exofs_i(dir);
|
||||
struct exofs_dir_entry *de;
|
||||
|
||||
if (npages == 0)
|
||||
goto out;
|
||||
|
||||
*res_page = NULL;
|
||||
|
||||
start = oi->i_dir_start_lookup;
|
||||
if (start >= npages)
|
||||
start = 0;
|
||||
n = start;
|
||||
do {
|
||||
char *kaddr;
|
||||
page = exofs_get_page(dir, n);
|
||||
if (!IS_ERR(page)) {
|
||||
kaddr = page_address(page);
|
||||
de = (struct exofs_dir_entry *) kaddr;
|
||||
kaddr += exofs_last_byte(dir, n) - reclen;
|
||||
while ((char *) de <= kaddr) {
|
||||
if (de->rec_len == 0) {
|
||||
EXOFS_ERR("ERROR: zero-length entry in "
|
||||
"directory(0x%lx)\n",
|
||||
dir->i_ino);
|
||||
exofs_put_page(page);
|
||||
goto out;
|
||||
}
|
||||
if (exofs_match(namelen, name, de))
|
||||
goto found;
|
||||
de = exofs_next_entry(de);
|
||||
}
|
||||
exofs_put_page(page);
|
||||
}
|
||||
if (++n >= npages)
|
||||
n = 0;
|
||||
} while (n != start);
|
||||
out:
|
||||
return NULL;
|
||||
|
||||
found:
|
||||
*res_page = page;
|
||||
oi->i_dir_start_lookup = n;
|
||||
return de;
|
||||
}
|
||||
|
||||
struct exofs_dir_entry *exofs_dotdot(struct inode *dir, struct page **p)
|
||||
{
|
||||
struct page *page = exofs_get_page(dir, 0);
|
||||
struct exofs_dir_entry *de = NULL;
|
||||
|
||||
if (!IS_ERR(page)) {
|
||||
de = exofs_next_entry(
|
||||
(struct exofs_dir_entry *)page_address(page));
|
||||
*p = page;
|
||||
}
|
||||
return de;
|
||||
}
|
||||
|
||||
ino_t exofs_parent_ino(struct dentry *child)
|
||||
{
|
||||
struct page *page;
|
||||
struct exofs_dir_entry *de;
|
||||
ino_t ino;
|
||||
|
||||
de = exofs_dotdot(d_inode(child), &page);
|
||||
if (!de)
|
||||
return 0;
|
||||
|
||||
ino = le64_to_cpu(de->inode_no);
|
||||
exofs_put_page(page);
|
||||
return ino;
|
||||
}
|
||||
|
||||
ino_t exofs_inode_by_name(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
ino_t res = 0;
|
||||
struct exofs_dir_entry *de;
|
||||
struct page *page;
|
||||
|
||||
de = exofs_find_entry(dir, dentry, &page);
|
||||
if (de) {
|
||||
res = le64_to_cpu(de->inode_no);
|
||||
exofs_put_page(page);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
|
||||
struct page *page, struct inode *inode)
|
||||
{
|
||||
loff_t pos = page_offset(page) +
|
||||
(char *) de - (char *) page_address(page);
|
||||
unsigned len = le16_to_cpu(de->rec_len);
|
||||
int err;
|
||||
|
||||
lock_page(page);
|
||||
err = exofs_write_begin(NULL, page->mapping, pos, len, 0, &page, NULL);
|
||||
if (err)
|
||||
EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n",
|
||||
err);
|
||||
|
||||
de->inode_no = cpu_to_le64(inode->i_ino);
|
||||
exofs_set_de_type(de, inode);
|
||||
if (likely(!err))
|
||||
err = exofs_commit_chunk(page, pos, len);
|
||||
exofs_put_page(page);
|
||||
dir->i_mtime = dir->i_ctime = current_time(dir);
|
||||
mark_inode_dirty(dir);
|
||||
return err;
|
||||
}
|
||||
|
||||
int exofs_add_link(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
struct inode *dir = d_inode(dentry->d_parent);
|
||||
const unsigned char *name = dentry->d_name.name;
|
||||
int namelen = dentry->d_name.len;
|
||||
unsigned chunk_size = exofs_chunk_size(dir);
|
||||
unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
|
||||
unsigned short rec_len, name_len;
|
||||
struct page *page = NULL;
|
||||
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
|
||||
struct exofs_dir_entry *de;
|
||||
unsigned long npages = dir_pages(dir);
|
||||
unsigned long n;
|
||||
char *kaddr;
|
||||
loff_t pos;
|
||||
int err;
|
||||
|
||||
for (n = 0; n <= npages; n++) {
|
||||
char *dir_end;
|
||||
|
||||
page = exofs_get_page(dir, n);
|
||||
err = PTR_ERR(page);
|
||||
if (IS_ERR(page))
|
||||
goto out;
|
||||
lock_page(page);
|
||||
kaddr = page_address(page);
|
||||
dir_end = kaddr + exofs_last_byte(dir, n);
|
||||
de = (struct exofs_dir_entry *)kaddr;
|
||||
kaddr += PAGE_SIZE - reclen;
|
||||
while ((char *)de <= kaddr) {
|
||||
if ((char *)de == dir_end) {
|
||||
name_len = 0;
|
||||
rec_len = chunk_size;
|
||||
de->rec_len = cpu_to_le16(chunk_size);
|
||||
de->inode_no = 0;
|
||||
goto got_it;
|
||||
}
|
||||
if (de->rec_len == 0) {
|
||||
EXOFS_ERR("ERROR: exofs_add_link: "
|
||||
"zero-length entry in directory(0x%lx)\n",
|
||||
inode->i_ino);
|
||||
err = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
err = -EEXIST;
|
||||
if (exofs_match(namelen, name, de))
|
||||
goto out_unlock;
|
||||
name_len = EXOFS_DIR_REC_LEN(de->name_len);
|
||||
rec_len = le16_to_cpu(de->rec_len);
|
||||
if (!de->inode_no && rec_len >= reclen)
|
||||
goto got_it;
|
||||
if (rec_len >= name_len + reclen)
|
||||
goto got_it;
|
||||
de = (struct exofs_dir_entry *) ((char *) de + rec_len);
|
||||
}
|
||||
unlock_page(page);
|
||||
exofs_put_page(page);
|
||||
}
|
||||
|
||||
EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
|
||||
dentry, inode->i_ino);
|
||||
return -EINVAL;
|
||||
|
||||
got_it:
|
||||
pos = page_offset(page) +
|
||||
(char *)de - (char *)page_address(page);
|
||||
err = exofs_write_begin(NULL, page->mapping, pos, rec_len, 0,
|
||||
&page, NULL);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
if (de->inode_no) {
|
||||
struct exofs_dir_entry *de1 =
|
||||
(struct exofs_dir_entry *)((char *)de + name_len);
|
||||
de1->rec_len = cpu_to_le16(rec_len - name_len);
|
||||
de->rec_len = cpu_to_le16(name_len);
|
||||
de = de1;
|
||||
}
|
||||
de->name_len = namelen;
|
||||
memcpy(de->name, name, namelen);
|
||||
de->inode_no = cpu_to_le64(inode->i_ino);
|
||||
exofs_set_de_type(de, inode);
|
||||
err = exofs_commit_chunk(page, pos, rec_len);
|
||||
dir->i_mtime = dir->i_ctime = current_time(dir);
|
||||
mark_inode_dirty(dir);
|
||||
sbi->s_numfiles++;
|
||||
|
||||
out_put:
|
||||
exofs_put_page(page);
|
||||
out:
|
||||
return err;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
|
||||
char *kaddr = page_address(page);
|
||||
unsigned from = ((char *)dir - kaddr) & ~(exofs_chunk_size(inode)-1);
|
||||
unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
|
||||
loff_t pos;
|
||||
struct exofs_dir_entry *pde = NULL;
|
||||
struct exofs_dir_entry *de = (struct exofs_dir_entry *) (kaddr + from);
|
||||
int err;
|
||||
|
||||
while (de < dir) {
|
||||
if (de->rec_len == 0) {
|
||||
EXOFS_ERR("ERROR: exofs_delete_entry:"
|
||||
"zero-length entry in directory(0x%lx)\n",
|
||||
inode->i_ino);
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
pde = de;
|
||||
de = exofs_next_entry(de);
|
||||
}
|
||||
if (pde)
|
||||
from = (char *)pde - (char *)page_address(page);
|
||||
pos = page_offset(page) + from;
|
||||
lock_page(page);
|
||||
err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0,
|
||||
&page, NULL);
|
||||
if (err)
|
||||
EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILED => %d\n",
|
||||
err);
|
||||
if (pde)
|
||||
pde->rec_len = cpu_to_le16(to - from);
|
||||
dir->inode_no = 0;
|
||||
if (likely(!err))
|
||||
err = exofs_commit_chunk(page, pos, to - from);
|
||||
inode->i_ctime = inode->i_mtime = current_time(inode);
|
||||
mark_inode_dirty(inode);
|
||||
sbi->s_numfiles--;
|
||||
out:
|
||||
exofs_put_page(page);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* kept aligned on 4 bytes */
|
||||
#define THIS_DIR ".\0\0"
|
||||
#define PARENT_DIR "..\0"
|
||||
|
||||
int exofs_make_empty(struct inode *inode, struct inode *parent)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct page *page = grab_cache_page(mapping, 0);
|
||||
unsigned chunk_size = exofs_chunk_size(inode);
|
||||
struct exofs_dir_entry *de;
|
||||
int err;
|
||||
void *kaddr;
|
||||
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
err = exofs_write_begin(NULL, page->mapping, 0, chunk_size, 0,
|
||||
&page, NULL);
|
||||
if (err) {
|
||||
unlock_page(page);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
kaddr = kmap_atomic(page);
|
||||
de = (struct exofs_dir_entry *)kaddr;
|
||||
de->name_len = 1;
|
||||
de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
|
||||
memcpy(de->name, THIS_DIR, sizeof(THIS_DIR));
|
||||
de->inode_no = cpu_to_le64(inode->i_ino);
|
||||
exofs_set_de_type(de, inode);
|
||||
|
||||
de = (struct exofs_dir_entry *)(kaddr + EXOFS_DIR_REC_LEN(1));
|
||||
de->name_len = 2;
|
||||
de->rec_len = cpu_to_le16(chunk_size - EXOFS_DIR_REC_LEN(1));
|
||||
de->inode_no = cpu_to_le64(parent->i_ino);
|
||||
memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
|
||||
exofs_set_de_type(de, inode);
|
||||
kunmap_atomic(kaddr);
|
||||
err = exofs_commit_chunk(page, 0, chunk_size);
|
||||
fail:
|
||||
put_page(page);
|
||||
return err;
|
||||
}
|
||||
|
||||
int exofs_empty_dir(struct inode *inode)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
unsigned long i, npages = dir_pages(inode);
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
char *kaddr;
|
||||
struct exofs_dir_entry *de;
|
||||
page = exofs_get_page(inode, i);
|
||||
|
||||
if (IS_ERR(page))
|
||||
continue;
|
||||
|
||||
kaddr = page_address(page);
|
||||
de = (struct exofs_dir_entry *)kaddr;
|
||||
kaddr += exofs_last_byte(inode, i) - EXOFS_DIR_REC_LEN(1);
|
||||
|
||||
while ((char *)de <= kaddr) {
|
||||
if (de->rec_len == 0) {
|
||||
EXOFS_ERR("ERROR: exofs_empty_dir: "
|
||||
"zero-length directory entry"
|
||||
"kaddr=%p, de=%p\n", kaddr, de);
|
||||
goto not_empty;
|
||||
}
|
||||
if (de->inode_no != 0) {
|
||||
/* check for . and .. */
|
||||
if (de->name[0] != '.')
|
||||
goto not_empty;
|
||||
if (de->name_len > 2)
|
||||
goto not_empty;
|
||||
if (de->name_len < 2) {
|
||||
if (le64_to_cpu(de->inode_no) !=
|
||||
inode->i_ino)
|
||||
goto not_empty;
|
||||
} else if (de->name[1] != '.')
|
||||
goto not_empty;
|
||||
}
|
||||
de = exofs_next_entry(de);
|
||||
}
|
||||
exofs_put_page(page);
|
||||
}
|
||||
return 1;
|
||||
|
||||
not_empty:
|
||||
exofs_put_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations exofs_dir_operations = {
|
||||
.llseek = generic_file_llseek,
|
||||
.read = generic_read_dir,
|
||||
.iterate_shared = exofs_readdir,
|
||||
};
|
240
fs/exofs/exofs.h
240
fs/exofs/exofs.h
@ -1,240 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005, 2006
|
||||
* Avishay Traeger (avishay@gmail.com)
|
||||
* Copyright (C) 2008, 2009
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* Copyrights for code taken from ext2:
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/inode.c
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation. Since it is based on ext2, and the only
|
||||
* valid version of GPL for the Linux kernel is version 2, the only valid
|
||||
* version of GPL for exofs is version 2.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef __EXOFS_H__
|
||||
#define __EXOFS_H__
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <scsi/osd_ore.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
|
||||
|
||||
#ifdef CONFIG_EXOFS_DEBUG
|
||||
#define EXOFS_DBGMSG(fmt, a...) \
|
||||
printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a)
|
||||
#else
|
||||
#define EXOFS_DBGMSG(fmt, a...) \
|
||||
do { if (0) printk(fmt, ##a); } while (0)
|
||||
#endif
|
||||
|
||||
/* u64 has problems with printk this will cast it to unsigned long long */
|
||||
#define _LLU(x) (unsigned long long)(x)
|
||||
|
||||
struct exofs_dev {
|
||||
struct ore_dev ored;
|
||||
unsigned did;
|
||||
unsigned urilen;
|
||||
uint8_t *uri;
|
||||
struct kobject ed_kobj;
|
||||
};
|
||||
/*
|
||||
* our extension to the in-memory superblock
|
||||
*/
|
||||
struct exofs_sb_info {
|
||||
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
|
||||
int s_timeout; /* timeout for OSD operations */
|
||||
uint64_t s_nextid; /* highest object ID used */
|
||||
uint32_t s_numfiles; /* number of files on fs */
|
||||
spinlock_t s_next_gen_lock; /* spinlock for gen # update */
|
||||
u32 s_next_generation; /* next gen # to use */
|
||||
atomic_t s_curr_pending; /* number of pending commands */
|
||||
|
||||
struct ore_layout layout; /* Default files layout */
|
||||
struct ore_comp one_comp; /* id & cred of partition id=0*/
|
||||
struct ore_components oc; /* comps for the partition */
|
||||
struct kobject s_kobj; /* holds per-sbi kobject */
|
||||
};
|
||||
|
||||
/*
|
||||
* our extension to the in-memory inode
|
||||
*/
|
||||
struct exofs_i_info {
|
||||
struct inode vfs_inode; /* normal in-memory inode */
|
||||
wait_queue_head_t i_wq; /* wait queue for inode */
|
||||
unsigned long i_flags; /* various atomic flags */
|
||||
uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
|
||||
uint32_t i_dir_start_lookup; /* which page to start lookup */
|
||||
uint64_t i_commit_size; /* the object's written length */
|
||||
struct ore_comp one_comp; /* same component for all devices */
|
||||
struct ore_components oc; /* inode view of the device table */
|
||||
};
|
||||
|
||||
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
|
||||
{
|
||||
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
|
||||
}
|
||||
|
||||
/*
|
||||
* our inode flags
|
||||
*/
|
||||
#define OBJ_2BCREATED 0 /* object will be created soon*/
|
||||
#define OBJ_CREATED 1 /* object has been created on the osd*/
|
||||
|
||||
static inline int obj_2bcreated(struct exofs_i_info *oi)
|
||||
{
|
||||
return test_bit(OBJ_2BCREATED, &oi->i_flags);
|
||||
}
|
||||
|
||||
static inline void set_obj_2bcreated(struct exofs_i_info *oi)
|
||||
{
|
||||
set_bit(OBJ_2BCREATED, &oi->i_flags);
|
||||
}
|
||||
|
||||
static inline int obj_created(struct exofs_i_info *oi)
|
||||
{
|
||||
return test_bit(OBJ_CREATED, &oi->i_flags);
|
||||
}
|
||||
|
||||
static inline void set_obj_created(struct exofs_i_info *oi)
|
||||
{
|
||||
set_bit(OBJ_CREATED, &oi->i_flags);
|
||||
}
|
||||
|
||||
int __exofs_wait_obj_created(struct exofs_i_info *oi);
|
||||
static inline int wait_obj_created(struct exofs_i_info *oi)
|
||||
{
|
||||
if (likely(obj_created(oi)))
|
||||
return 0;
|
||||
|
||||
return __exofs_wait_obj_created(oi);
|
||||
}
|
||||
|
||||
/*
|
||||
* get to our inode from the vfs inode
|
||||
*/
|
||||
static inline struct exofs_i_info *exofs_i(struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct exofs_i_info, vfs_inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum count of links to a file
|
||||
*/
|
||||
#define EXOFS_LINK_MAX 32000
|
||||
|
||||
/*************************
|
||||
* function declarations *
|
||||
*************************/
|
||||
|
||||
/* inode.c */
|
||||
unsigned exofs_max_io_pages(struct ore_layout *layout,
|
||||
unsigned expected_pages);
|
||||
int exofs_setattr(struct dentry *, struct iattr *);
|
||||
int exofs_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata);
|
||||
extern struct inode *exofs_iget(struct super_block *, unsigned long);
|
||||
struct inode *exofs_new_inode(struct inode *, umode_t);
|
||||
extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
|
||||
extern void exofs_evict_inode(struct inode *);
|
||||
|
||||
/* dir.c: */
|
||||
int exofs_add_link(struct dentry *, struct inode *);
|
||||
ino_t exofs_inode_by_name(struct inode *, struct dentry *);
|
||||
int exofs_delete_entry(struct exofs_dir_entry *, struct page *);
|
||||
int exofs_make_empty(struct inode *, struct inode *);
|
||||
struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *,
|
||||
struct page **);
|
||||
int exofs_empty_dir(struct inode *);
|
||||
struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **);
|
||||
ino_t exofs_parent_ino(struct dentry *child);
|
||||
int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
|
||||
struct inode *);
|
||||
|
||||
/* super.c */
|
||||
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
|
||||
const struct osd_obj_id *obj);
|
||||
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
|
||||
|
||||
/* sys.c */
|
||||
int exofs_sysfs_init(void);
|
||||
void exofs_sysfs_uninit(void);
|
||||
int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
|
||||
struct exofs_dt_device_info *dt_dev);
|
||||
void exofs_sysfs_sb_del(struct exofs_sb_info *sbi);
|
||||
int exofs_sysfs_odev_add(struct exofs_dev *edev,
|
||||
struct exofs_sb_info *sbi);
|
||||
void exofs_sysfs_dbg_print(void);
|
||||
|
||||
/*********************
|
||||
* operation vectors *
|
||||
*********************/
|
||||
/* dir.c: */
|
||||
extern const struct file_operations exofs_dir_operations;
|
||||
|
||||
/* file.c */
|
||||
extern const struct inode_operations exofs_file_inode_operations;
|
||||
extern const struct file_operations exofs_file_operations;
|
||||
|
||||
/* inode.c */
|
||||
extern const struct address_space_operations exofs_aops;
|
||||
|
||||
/* namei.c */
|
||||
extern const struct inode_operations exofs_dir_inode_operations;
|
||||
extern const struct inode_operations exofs_special_inode_operations;
|
||||
|
||||
/* exofs_init_comps will initialize an ore_components device array
|
||||
* pointing to a single ore_comp struct, and a round-robin view
|
||||
* of the device table.
|
||||
* The first device of each inode is the [inode->ino % num_devices]
|
||||
* and the rest of the devices sequentially following where the
|
||||
* first device is after the last device.
|
||||
* It is assumed that the global device array at @sbi is twice
|
||||
* bigger and that the device table repeats twice.
|
||||
* See: exofs_read_lookup_dev_table()
|
||||
*/
|
||||
static inline void exofs_init_comps(struct ore_components *oc,
|
||||
struct ore_comp *one_comp,
|
||||
struct exofs_sb_info *sbi, osd_id oid)
|
||||
{
|
||||
unsigned dev_mod = (unsigned)oid, first_dev;
|
||||
|
||||
one_comp->obj.partition = sbi->one_comp.obj.partition;
|
||||
one_comp->obj.id = oid;
|
||||
exofs_make_credential(one_comp->cred, &one_comp->obj);
|
||||
|
||||
oc->first_dev = 0;
|
||||
oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
|
||||
sbi->layout.group_count;
|
||||
oc->single_comp = EC_SINGLE_COMP;
|
||||
oc->comps = one_comp;
|
||||
|
||||
/* Round robin device view of the table */
|
||||
first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
|
||||
oc->ods = &sbi->oc.ods[first_dev];
|
||||
}
|
||||
|
||||
#endif
|
@ -1,83 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005, 2006
|
||||
* Avishay Traeger (avishay@gmail.com)
|
||||
* Copyright (C) 2008, 2009
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* Copyrights for code taken from ext2:
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/inode.c
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation. Since it is based on ext2, and the only
|
||||
* valid version of GPL for the Linux kernel is version 2, the only valid
|
||||
* version of GPL for exofs is version 2.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "exofs.h"
|
||||
|
||||
static int exofs_release_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* exofs_file_fsync - flush the inode to disk
|
||||
*
|
||||
* Note, in exofs all metadata is written as part of inode, regardless.
|
||||
* The writeout is synchronous
|
||||
*/
|
||||
static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct inode *inode = filp->f_mapping->host;
|
||||
int ret;
|
||||
|
||||
ret = file_write_and_wait_range(filp, start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode_lock(inode);
|
||||
ret = sync_inode_metadata(filp->f_mapping->host, 1);
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int exofs_flush(struct file *file, fl_owner_t id)
|
||||
{
|
||||
int ret = vfs_fsync(file, 0);
|
||||
/* TODO: Flush the OSD target */
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct file_operations exofs_file_operations = {
|
||||
.llseek = generic_file_llseek,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.write_iter = generic_file_write_iter,
|
||||
.mmap = generic_file_mmap,
|
||||
.open = generic_file_open,
|
||||
.release = exofs_release_file,
|
||||
.fsync = exofs_file_fsync,
|
||||
.flush = exofs_flush,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
};
|
||||
|
||||
const struct inode_operations exofs_file_inode_operations = {
|
||||
.setattr = exofs_setattr,
|
||||
};
|
1514
fs/exofs/inode.c
1514
fs/exofs/inode.c
File diff suppressed because it is too large
Load Diff
323
fs/exofs/namei.c
323
fs/exofs/namei.c
@ -1,323 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005, 2006
|
||||
* Avishay Traeger (avishay@gmail.com)
|
||||
* Copyright (C) 2008, 2009
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* Copyrights for code taken from ext2:
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
* from
|
||||
* linux/fs/minix/inode.c
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation. Since it is based on ext2, and the only
|
||||
* valid version of GPL for the Linux kernel is version 2, the only valid
|
||||
* version of GPL for exofs is version 2.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "exofs.h"
|
||||
|
||||
static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
int err = exofs_add_link(dentry, inode);
|
||||
if (!err) {
|
||||
d_instantiate(dentry, inode);
|
||||
return 0;
|
||||
}
|
||||
inode_dec_link_count(inode);
|
||||
iput(inode);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct inode *inode;
|
||||
ino_t ino;
|
||||
|
||||
if (dentry->d_name.len > EXOFS_NAME_LEN)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
|
||||
ino = exofs_inode_by_name(dir, dentry);
|
||||
inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
|
||||
static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
struct inode *inode = exofs_new_inode(dir, mode);
|
||||
int err = PTR_ERR(inode);
|
||||
if (!IS_ERR(inode)) {
|
||||
inode->i_op = &exofs_file_inode_operations;
|
||||
inode->i_fop = &exofs_file_operations;
|
||||
inode->i_mapping->a_ops = &exofs_aops;
|
||||
mark_inode_dirty(inode);
|
||||
err = exofs_add_nondir(dentry, inode);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
struct inode *inode;
|
||||
int err;
|
||||
|
||||
inode = exofs_new_inode(dir, mode);
|
||||
err = PTR_ERR(inode);
|
||||
if (!IS_ERR(inode)) {
|
||||
init_special_inode(inode, inode->i_mode, rdev);
|
||||
mark_inode_dirty(inode);
|
||||
err = exofs_add_nondir(dentry, inode);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int exofs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *symname)
|
||||
{
|
||||
struct super_block *sb = dir->i_sb;
|
||||
int err = -ENAMETOOLONG;
|
||||
unsigned l = strlen(symname)+1;
|
||||
struct inode *inode;
|
||||
struct exofs_i_info *oi;
|
||||
|
||||
if (l > sb->s_blocksize)
|
||||
goto out;
|
||||
|
||||
inode = exofs_new_inode(dir, S_IFLNK | S_IRWXUGO);
|
||||
err = PTR_ERR(inode);
|
||||
if (IS_ERR(inode))
|
||||
goto out;
|
||||
|
||||
oi = exofs_i(inode);
|
||||
if (l > sizeof(oi->i_data)) {
|
||||
/* slow symlink */
|
||||
inode->i_op = &page_symlink_inode_operations;
|
||||
inode_nohighmem(inode);
|
||||
inode->i_mapping->a_ops = &exofs_aops;
|
||||
memset(oi->i_data, 0, sizeof(oi->i_data));
|
||||
|
||||
err = page_symlink(inode, symname, l);
|
||||
if (err)
|
||||
goto out_fail;
|
||||
} else {
|
||||
/* fast symlink */
|
||||
inode->i_op = &simple_symlink_inode_operations;
|
||||
inode->i_link = (char *)oi->i_data;
|
||||
memcpy(oi->i_data, symname, l);
|
||||
inode->i_size = l-1;
|
||||
}
|
||||
mark_inode_dirty(inode);
|
||||
|
||||
err = exofs_add_nondir(dentry, inode);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_fail:
|
||||
inode_dec_link_count(inode);
|
||||
iput(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int exofs_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(old_dentry);
|
||||
|
||||
inode->i_ctime = current_time(inode);
|
||||
inode_inc_link_count(inode);
|
||||
ihold(inode);
|
||||
|
||||
return exofs_add_nondir(dentry, inode);
|
||||
}
|
||||
|
||||
static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
struct inode *inode;
|
||||
int err;
|
||||
|
||||
inode_inc_link_count(dir);
|
||||
|
||||
inode = exofs_new_inode(dir, S_IFDIR | mode);
|
||||
err = PTR_ERR(inode);
|
||||
if (IS_ERR(inode))
|
||||
goto out_dir;
|
||||
|
||||
inode->i_op = &exofs_dir_inode_operations;
|
||||
inode->i_fop = &exofs_dir_operations;
|
||||
inode->i_mapping->a_ops = &exofs_aops;
|
||||
|
||||
inode_inc_link_count(inode);
|
||||
|
||||
err = exofs_make_empty(inode, dir);
|
||||
if (err)
|
||||
goto out_fail;
|
||||
|
||||
err = exofs_add_link(dentry, inode);
|
||||
if (err)
|
||||
goto out_fail;
|
||||
|
||||
d_instantiate(dentry, inode);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_fail:
|
||||
inode_dec_link_count(inode);
|
||||
inode_dec_link_count(inode);
|
||||
iput(inode);
|
||||
out_dir:
|
||||
inode_dec_link_count(dir);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int exofs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct exofs_dir_entry *de;
|
||||
struct page *page;
|
||||
int err = -ENOENT;
|
||||
|
||||
de = exofs_find_entry(dir, dentry, &page);
|
||||
if (!de)
|
||||
goto out;
|
||||
|
||||
err = exofs_delete_entry(de, page);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
inode->i_ctime = dir->i_ctime;
|
||||
inode_dec_link_count(inode);
|
||||
err = 0;
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
int err = -ENOTEMPTY;
|
||||
|
||||
if (exofs_empty_dir(inode)) {
|
||||
err = exofs_unlink(dir, dentry);
|
||||
if (!err) {
|
||||
inode->i_size = 0;
|
||||
inode_dec_link_count(inode);
|
||||
inode_dec_link_count(dir);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct inode *old_inode = d_inode(old_dentry);
|
||||
struct inode *new_inode = d_inode(new_dentry);
|
||||
struct page *dir_page = NULL;
|
||||
struct exofs_dir_entry *dir_de = NULL;
|
||||
struct page *old_page;
|
||||
struct exofs_dir_entry *old_de;
|
||||
int err = -ENOENT;
|
||||
|
||||
if (flags & ~RENAME_NOREPLACE)
|
||||
return -EINVAL;
|
||||
|
||||
old_de = exofs_find_entry(old_dir, old_dentry, &old_page);
|
||||
if (!old_de)
|
||||
goto out;
|
||||
|
||||
if (S_ISDIR(old_inode->i_mode)) {
|
||||
err = -EIO;
|
||||
dir_de = exofs_dotdot(old_inode, &dir_page);
|
||||
if (!dir_de)
|
||||
goto out_old;
|
||||
}
|
||||
|
||||
if (new_inode) {
|
||||
struct page *new_page;
|
||||
struct exofs_dir_entry *new_de;
|
||||
|
||||
err = -ENOTEMPTY;
|
||||
if (dir_de && !exofs_empty_dir(new_inode))
|
||||
goto out_dir;
|
||||
|
||||
err = -ENOENT;
|
||||
new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
|
||||
if (!new_de)
|
||||
goto out_dir;
|
||||
err = exofs_set_link(new_dir, new_de, new_page, old_inode);
|
||||
new_inode->i_ctime = current_time(new_inode);
|
||||
if (dir_de)
|
||||
drop_nlink(new_inode);
|
||||
inode_dec_link_count(new_inode);
|
||||
if (err)
|
||||
goto out_dir;
|
||||
} else {
|
||||
err = exofs_add_link(new_dentry, old_inode);
|
||||
if (err)
|
||||
goto out_dir;
|
||||
if (dir_de)
|
||||
inode_inc_link_count(new_dir);
|
||||
}
|
||||
|
||||
old_inode->i_ctime = current_time(old_inode);
|
||||
|
||||
exofs_delete_entry(old_de, old_page);
|
||||
mark_inode_dirty(old_inode);
|
||||
|
||||
if (dir_de) {
|
||||
err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
|
||||
inode_dec_link_count(old_dir);
|
||||
if (err)
|
||||
goto out_dir;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
out_dir:
|
||||
if (dir_de) {
|
||||
kunmap(dir_page);
|
||||
put_page(dir_page);
|
||||
}
|
||||
out_old:
|
||||
kunmap(old_page);
|
||||
put_page(old_page);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct inode_operations exofs_dir_inode_operations = {
|
||||
.create = exofs_create,
|
||||
.lookup = exofs_lookup,
|
||||
.link = exofs_link,
|
||||
.unlink = exofs_unlink,
|
||||
.symlink = exofs_symlink,
|
||||
.mkdir = exofs_mkdir,
|
||||
.rmdir = exofs_rmdir,
|
||||
.mknod = exofs_mknod,
|
||||
.rename = exofs_rename,
|
||||
.setattr = exofs_setattr,
|
||||
};
|
||||
|
||||
const struct inode_operations exofs_special_inode_operations = {
|
||||
.setattr = exofs_setattr,
|
||||
};
|
1178
fs/exofs/ore.c
1178
fs/exofs/ore.c
File diff suppressed because it is too large
Load Diff
@ -1,756 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2011
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This file is part of the objects raid engine (ore).
|
||||
*
|
||||
* It is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "ore". If not, write to the Free Software Foundation, Inc:
|
||||
* "Free Software Foundation <info@fsf.org>"
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/async_tx.h>
|
||||
|
||||
#include "ore_raid.h"
|
||||
|
||||
#undef ORE_DBGMSG2
|
||||
#define ORE_DBGMSG2 ORE_DBGMSG
|
||||
|
||||
static struct page *_raid_page_alloc(void)
|
||||
{
|
||||
return alloc_page(GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void _raid_page_free(struct page *p)
|
||||
{
|
||||
__free_page(p);
|
||||
}
|
||||
|
||||
/* This struct is forward declare in ore_io_state, but is private to here.
|
||||
* It is put on ios->sp2d for RAID5/6 writes only. See _gen_xor_unit.
|
||||
*
|
||||
* __stripe_pages_2d is a 2d array of pages, and it is also a corner turn.
|
||||
* Ascending page index access is sp2d(p-minor, c-major). But storage is
|
||||
* sp2d[p-minor][c-major], so it can be properlly presented to the async-xor
|
||||
* API.
|
||||
*/
|
||||
struct __stripe_pages_2d {
|
||||
/* Cache some hot path repeated calculations */
|
||||
unsigned parity;
|
||||
unsigned data_devs;
|
||||
unsigned pages_in_unit;
|
||||
|
||||
bool needed ;
|
||||
|
||||
/* Array size is pages_in_unit (layout->stripe_unit / PAGE_SIZE) */
|
||||
struct __1_page_stripe {
|
||||
bool alloc;
|
||||
unsigned write_count;
|
||||
struct async_submit_ctl submit;
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
|
||||
/* The size of this array is data_devs + parity */
|
||||
struct page **pages;
|
||||
struct page **scribble;
|
||||
/* bool array, size of this array is data_devs */
|
||||
char *page_is_read;
|
||||
} _1p_stripes[];
|
||||
};
|
||||
|
||||
/* This can get bigger then a page. So support multiple page allocations
|
||||
* _sp2d_free should be called even if _sp2d_alloc fails (by returning
|
||||
* none-zero).
|
||||
*/
|
||||
static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
|
||||
unsigned parity, struct __stripe_pages_2d **psp2d)
|
||||
{
|
||||
struct __stripe_pages_2d *sp2d;
|
||||
unsigned data_devs = group_width - parity;
|
||||
|
||||
/*
|
||||
* Desired allocation layout is, though when larger than PAGE_SIZE,
|
||||
* each struct __alloc_1p_arrays is separately allocated:
|
||||
|
||||
struct _alloc_all_bytes {
|
||||
struct __alloc_stripe_pages_2d {
|
||||
struct __stripe_pages_2d sp2d;
|
||||
struct __1_page_stripe _1p_stripes[pages_in_unit];
|
||||
} __asp2d;
|
||||
struct __alloc_1p_arrays {
|
||||
struct page *pages[group_width];
|
||||
struct page *scribble[group_width];
|
||||
char page_is_read[data_devs];
|
||||
} __a1pa[pages_in_unit];
|
||||
} *_aab;
|
||||
|
||||
struct __alloc_1p_arrays *__a1pa;
|
||||
struct __alloc_1p_arrays *__a1pa_end;
|
||||
|
||||
*/
|
||||
|
||||
char *__a1pa;
|
||||
char *__a1pa_end;
|
||||
|
||||
const size_t sizeof_stripe_pages_2d =
|
||||
sizeof(struct __stripe_pages_2d) +
|
||||
sizeof(struct __1_page_stripe) * pages_in_unit;
|
||||
const size_t sizeof__a1pa =
|
||||
ALIGN(sizeof(struct page *) * (2 * group_width) + data_devs,
|
||||
sizeof(void *));
|
||||
const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit;
|
||||
const size_t alloc_total = sizeof_stripe_pages_2d +
|
||||
sizeof__a1pa_arrays;
|
||||
|
||||
unsigned num_a1pa, alloc_size, i;
|
||||
|
||||
/* FIXME: check these numbers in ore_verify_layout */
|
||||
BUG_ON(sizeof_stripe_pages_2d > PAGE_SIZE);
|
||||
BUG_ON(sizeof__a1pa > PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* If alloc_total would be larger than PAGE_SIZE, only allocate
|
||||
* as many a1pa items as would fill the rest of the page, instead
|
||||
* of the full pages_in_unit count.
|
||||
*/
|
||||
if (alloc_total > PAGE_SIZE) {
|
||||
num_a1pa = (PAGE_SIZE - sizeof_stripe_pages_2d) / sizeof__a1pa;
|
||||
alloc_size = sizeof_stripe_pages_2d + sizeof__a1pa * num_a1pa;
|
||||
} else {
|
||||
num_a1pa = pages_in_unit;
|
||||
alloc_size = alloc_total;
|
||||
}
|
||||
|
||||
*psp2d = sp2d = kzalloc(alloc_size, GFP_KERNEL);
|
||||
if (unlikely(!sp2d)) {
|
||||
ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* From here Just call _sp2d_free */
|
||||
|
||||
/* Find start of a1pa area. */
|
||||
__a1pa = (char *)sp2d + sizeof_stripe_pages_2d;
|
||||
/* Find end of the _allocated_ a1pa area. */
|
||||
__a1pa_end = __a1pa + alloc_size;
|
||||
|
||||
/* Allocate additionally needed a1pa items in PAGE_SIZE chunks. */
|
||||
for (i = 0; i < pages_in_unit; ++i) {
|
||||
struct __1_page_stripe *stripe = &sp2d->_1p_stripes[i];
|
||||
|
||||
if (unlikely(__a1pa >= __a1pa_end)) {
|
||||
num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
|
||||
pages_in_unit - i);
|
||||
alloc_size = sizeof__a1pa * num_a1pa;
|
||||
|
||||
__a1pa = kzalloc(alloc_size, GFP_KERNEL);
|
||||
if (unlikely(!__a1pa)) {
|
||||
ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
|
||||
num_a1pa);
|
||||
return -ENOMEM;
|
||||
}
|
||||
__a1pa_end = __a1pa + alloc_size;
|
||||
/* First *pages is marked for kfree of the buffer */
|
||||
stripe->alloc = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach all _lp_stripes pointers to the allocation for
|
||||
* it which was either part of the original PAGE_SIZE
|
||||
* allocation or the subsequent allocation in this loop.
|
||||
*/
|
||||
stripe->pages = (void *)__a1pa;
|
||||
stripe->scribble = stripe->pages + group_width;
|
||||
stripe->page_is_read = (char *)stripe->scribble + group_width;
|
||||
__a1pa += sizeof__a1pa;
|
||||
}
|
||||
|
||||
sp2d->parity = parity;
|
||||
sp2d->data_devs = data_devs;
|
||||
sp2d->pages_in_unit = pages_in_unit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
|
||||
const struct _ore_r4w_op *r4w, void *priv)
|
||||
{
|
||||
unsigned data_devs = sp2d->data_devs;
|
||||
unsigned group_width = data_devs + sp2d->parity;
|
||||
int p, c;
|
||||
|
||||
if (!sp2d->needed)
|
||||
return;
|
||||
|
||||
for (c = data_devs - 1; c >= 0; --c)
|
||||
for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
if (_1ps->page_is_read[c]) {
|
||||
struct page *page = _1ps->pages[c];
|
||||
|
||||
r4w->put_page(priv, page);
|
||||
_1ps->page_is_read[c] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (p = 0; p < sp2d->pages_in_unit; p++) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
|
||||
_1ps->write_count = 0;
|
||||
_1ps->tx = NULL;
|
||||
}
|
||||
|
||||
sp2d->needed = false;
|
||||
}
|
||||
|
||||
static void _sp2d_free(struct __stripe_pages_2d *sp2d)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (!sp2d)
|
||||
return;
|
||||
|
||||
for (i = 0; i < sp2d->pages_in_unit; ++i) {
|
||||
if (sp2d->_1p_stripes[i].alloc)
|
||||
kfree(sp2d->_1p_stripes[i].pages);
|
||||
}
|
||||
|
||||
kfree(sp2d);
|
||||
}
|
||||
|
||||
static unsigned _sp2d_min_pg(struct __stripe_pages_2d *sp2d)
|
||||
{
|
||||
unsigned p;
|
||||
|
||||
for (p = 0; p < sp2d->pages_in_unit; p++) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
if (_1ps->write_count)
|
||||
return p;
|
||||
}
|
||||
|
||||
return ~0;
|
||||
}
|
||||
|
||||
static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)
|
||||
{
|
||||
int p;
|
||||
|
||||
for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
if (_1ps->write_count)
|
||||
return p;
|
||||
}
|
||||
|
||||
return ~0;
|
||||
}
|
||||
|
||||
static void _gen_xor_unit(struct __stripe_pages_2d *sp2d)
|
||||
{
|
||||
unsigned p;
|
||||
unsigned tx_flags = ASYNC_TX_ACK;
|
||||
|
||||
if (sp2d->parity == 1)
|
||||
tx_flags |= ASYNC_TX_XOR_ZERO_DST;
|
||||
|
||||
for (p = 0; p < sp2d->pages_in_unit; p++) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
if (!_1ps->write_count)
|
||||
continue;
|
||||
|
||||
init_async_submit(&_1ps->submit, tx_flags,
|
||||
NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble);
|
||||
|
||||
if (sp2d->parity == 1)
|
||||
_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs],
|
||||
_1ps->pages, 0, sp2d->data_devs,
|
||||
PAGE_SIZE, &_1ps->submit);
|
||||
else /* parity == 2 */
|
||||
_1ps->tx = async_gen_syndrome(_1ps->pages, 0,
|
||||
sp2d->data_devs + sp2d->parity,
|
||||
PAGE_SIZE, &_1ps->submit);
|
||||
}
|
||||
|
||||
for (p = 0; p < sp2d->pages_in_unit; p++) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
/* NOTE: We wait for HW synchronously (I don't have such HW
|
||||
* to test with.) Is parallelism needed with today's multi
|
||||
* cores?
|
||||
*/
|
||||
async_tx_issue_pending(_1ps->tx);
|
||||
}
|
||||
}
|
||||
|
||||
void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
|
||||
struct ore_striping_info *si, struct page *page)
|
||||
{
|
||||
struct __1_page_stripe *_1ps;
|
||||
|
||||
sp2d->needed = true;
|
||||
|
||||
_1ps = &sp2d->_1p_stripes[si->cur_pg];
|
||||
_1ps->pages[si->cur_comp] = page;
|
||||
++_1ps->write_count;
|
||||
|
||||
si->cur_pg = (si->cur_pg + 1) % sp2d->pages_in_unit;
|
||||
/* si->cur_comp is advanced outside at main loop */
|
||||
}
|
||||
|
||||
void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
|
||||
bool not_last)
|
||||
{
|
||||
struct osd_sg_entry *sge;
|
||||
|
||||
ORE_DBGMSG("dev=%d cur_len=0x%x not_last=%d cur_sg=%d "
|
||||
"offset=0x%llx length=0x%x last_sgs_total=0x%x\n",
|
||||
per_dev->dev, cur_len, not_last, per_dev->cur_sg,
|
||||
_LLU(per_dev->offset), per_dev->length,
|
||||
per_dev->last_sgs_total);
|
||||
|
||||
if (!per_dev->cur_sg) {
|
||||
sge = per_dev->sglist;
|
||||
|
||||
/* First time we prepare two entries */
|
||||
if (per_dev->length) {
|
||||
++per_dev->cur_sg;
|
||||
sge->offset = per_dev->offset;
|
||||
sge->len = per_dev->length;
|
||||
} else {
|
||||
/* Here the parity is the first unit of this object.
|
||||
* This happens every time we reach a parity device on
|
||||
* the same stripe as the per_dev->offset. We need to
|
||||
* just skip this unit.
|
||||
*/
|
||||
per_dev->offset += cur_len;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* finalize the last one */
|
||||
sge = &per_dev->sglist[per_dev->cur_sg - 1];
|
||||
sge->len = per_dev->length - per_dev->last_sgs_total;
|
||||
}
|
||||
|
||||
if (not_last) {
|
||||
/* Partly prepare the next one */
|
||||
struct osd_sg_entry *next_sge = sge + 1;
|
||||
|
||||
++per_dev->cur_sg;
|
||||
next_sge->offset = sge->offset + sge->len + cur_len;
|
||||
/* Save cur len so we know how mutch was added next time */
|
||||
per_dev->last_sgs_total = per_dev->length;
|
||||
next_sge->len = 0;
|
||||
} else if (!sge->len) {
|
||||
/* Optimize for when the last unit is a parity */
|
||||
--per_dev->cur_sg;
|
||||
}
|
||||
}
|
||||
|
||||
static int _alloc_read_4_write(struct ore_io_state *ios)
|
||||
{
|
||||
struct ore_layout *layout = ios->layout;
|
||||
int ret;
|
||||
/* We want to only read those pages not in cache so worst case
|
||||
* is a stripe populated with every other page
|
||||
*/
|
||||
unsigned sgs_per_dev = ios->sp2d->pages_in_unit + 2;
|
||||
|
||||
ret = _ore_get_io_state(layout, ios->oc,
|
||||
layout->group_width * layout->mirrors_p1,
|
||||
sgs_per_dev, 0, &ios->ios_read_4_write);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* @si contains info of the to-be-inserted page. Update of @si should be
|
||||
* maintained by caller. Specificaly si->dev, si->obj_offset, ...
|
||||
*/
|
||||
static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
|
||||
struct page *page, unsigned pg_len)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct ore_per_dev_state *per_dev;
|
||||
struct ore_io_state *read_ios;
|
||||
unsigned first_dev = si->dev - (si->dev %
|
||||
(ios->layout->group_width * ios->layout->mirrors_p1));
|
||||
unsigned comp = si->dev - first_dev;
|
||||
unsigned added_len;
|
||||
|
||||
if (!ios->ios_read_4_write) {
|
||||
int ret = _alloc_read_4_write(ios);
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
|
||||
read_ios = ios->ios_read_4_write;
|
||||
read_ios->numdevs = ios->layout->group_width * ios->layout->mirrors_p1;
|
||||
|
||||
per_dev = &read_ios->per_dev[comp];
|
||||
if (!per_dev->length) {
|
||||
per_dev->bio = bio_kmalloc(GFP_KERNEL,
|
||||
ios->sp2d->pages_in_unit);
|
||||
if (unlikely(!per_dev->bio)) {
|
||||
ORE_DBGMSG("Failed to allocate BIO size=%u\n",
|
||||
ios->sp2d->pages_in_unit);
|
||||
return -ENOMEM;
|
||||
}
|
||||
per_dev->offset = si->obj_offset;
|
||||
per_dev->dev = si->dev;
|
||||
} else if (si->obj_offset != (per_dev->offset + per_dev->length)) {
|
||||
u64 gap = si->obj_offset - (per_dev->offset + per_dev->length);
|
||||
|
||||
_ore_add_sg_seg(per_dev, gap, true);
|
||||
}
|
||||
q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
|
||||
added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
|
||||
si->obj_offset % PAGE_SIZE);
|
||||
if (unlikely(added_len != pg_len)) {
|
||||
ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
|
||||
per_dev->bio->bi_vcnt);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
per_dev->length += pg_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* read the beginning of an unaligned first page */
|
||||
static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
|
||||
{
|
||||
struct ore_striping_info si;
|
||||
unsigned pg_len;
|
||||
|
||||
ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
|
||||
|
||||
pg_len = si.obj_offset % PAGE_SIZE;
|
||||
si.obj_offset -= pg_len;
|
||||
|
||||
ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
|
||||
_LLU(si.obj_offset), pg_len, page->index, si.dev);
|
||||
|
||||
return _add_to_r4w(ios, &si, page, pg_len);
|
||||
}
|
||||
|
||||
/* read the end of an incomplete last page */
|
||||
static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
|
||||
{
|
||||
struct ore_striping_info si;
|
||||
struct page *page;
|
||||
unsigned pg_len, p, c;
|
||||
|
||||
ore_calc_stripe_info(ios->layout, *offset, 0, &si);
|
||||
|
||||
p = si.cur_pg;
|
||||
c = si.cur_comp;
|
||||
page = ios->sp2d->_1p_stripes[p].pages[c];
|
||||
|
||||
pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
|
||||
*offset += pg_len;
|
||||
|
||||
ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
|
||||
p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
|
||||
|
||||
BUG_ON(!page);
|
||||
|
||||
return _add_to_r4w(ios, &si, page, pg_len);
|
||||
}
|
||||
|
||||
static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
|
||||
{
|
||||
struct bio_vec *bv;
|
||||
unsigned i, d;
|
||||
|
||||
/* loop on all devices all pages */
|
||||
for (d = 0; d < ios->numdevs; d++) {
|
||||
struct bio *bio = ios->per_dev[d].bio;
|
||||
|
||||
if (!bio)
|
||||
continue;
|
||||
|
||||
bio_for_each_segment_all(bv, bio, i) {
|
||||
struct page *page = bv->bv_page;
|
||||
|
||||
SetPageUptodate(page);
|
||||
if (PageError(page))
|
||||
ClearPageError(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* read_4_write is hacked to read the start of the first stripe and/or
|
||||
* the end of the last stripe. If needed, with an sg-gap at each device/page.
|
||||
* It is assumed to be called after the to_be_written pages of the first stripe
|
||||
* are populating ios->sp2d[][]
|
||||
*
|
||||
* NOTE: We call ios->r4w->lock_fn for all pages needed for parity calculations
|
||||
* These pages are held at sp2d[p].pages[c] but with
|
||||
* sp2d[p].page_is_read[c] = true. At _sp2d_reset these pages are
|
||||
* ios->r4w->lock_fn(). The ios->r4w->lock_fn might signal that the page is
|
||||
* @uptodate=true, so we don't need to read it, only unlock, after IO.
|
||||
*
|
||||
* TODO: The read_4_write should calc a need_to_read_pages_count, if bigger then
|
||||
* to-be-written count, we should consider the xor-in-place mode.
|
||||
* need_to_read_pages_count is the actual number of pages not present in cache.
|
||||
* maybe "devs_in_group - ios->sp2d[p].write_count" is a good enough
|
||||
* approximation? In this mode the read pages are put in the empty places of
|
||||
* ios->sp2d[p][*], xor is calculated the same way. These pages are
|
||||
* allocated/freed and don't go through cache
|
||||
*/
|
||||
static int _read_4_write_first_stripe(struct ore_io_state *ios)
|
||||
{
|
||||
struct ore_striping_info read_si;
|
||||
struct __stripe_pages_2d *sp2d = ios->sp2d;
|
||||
u64 offset = ios->si.first_stripe_start;
|
||||
unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
|
||||
|
||||
if (offset == ios->offset) /* Go to start collect $200 */
|
||||
goto read_last_stripe;
|
||||
|
||||
min_p = _sp2d_min_pg(sp2d);
|
||||
max_p = _sp2d_max_pg(sp2d);
|
||||
|
||||
ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
|
||||
offset, ios->offset, min_p, max_p);
|
||||
|
||||
for (c = 0; ; c++) {
|
||||
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
|
||||
read_si.obj_offset += min_p * PAGE_SIZE;
|
||||
offset += min_p * PAGE_SIZE;
|
||||
for (p = min_p; p <= max_p; p++) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
struct page **pp = &_1ps->pages[c];
|
||||
bool uptodate;
|
||||
|
||||
if (*pp) {
|
||||
if (ios->offset % PAGE_SIZE)
|
||||
/* Read the remainder of the page */
|
||||
_add_to_r4w_first_page(ios, *pp);
|
||||
/* to-be-written pages start here */
|
||||
goto read_last_stripe;
|
||||
}
|
||||
|
||||
*pp = ios->r4w->get_page(ios->private, offset,
|
||||
&uptodate);
|
||||
if (unlikely(!*pp))
|
||||
return -ENOMEM;
|
||||
|
||||
if (!uptodate)
|
||||
_add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
|
||||
|
||||
/* Mark read-pages to be cache_released */
|
||||
_1ps->page_is_read[c] = true;
|
||||
read_si.obj_offset += PAGE_SIZE;
|
||||
offset += PAGE_SIZE;
|
||||
}
|
||||
offset += (sp2d->pages_in_unit - p) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
read_last_stripe:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _read_4_write_last_stripe(struct ore_io_state *ios)
|
||||
{
|
||||
struct ore_striping_info read_si;
|
||||
struct __stripe_pages_2d *sp2d = ios->sp2d;
|
||||
u64 offset;
|
||||
u64 last_stripe_end;
|
||||
unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
|
||||
unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
|
||||
|
||||
offset = ios->offset + ios->length;
|
||||
if (offset % PAGE_SIZE)
|
||||
_add_to_r4w_last_page(ios, &offset);
|
||||
/* offset will be aligned to next page */
|
||||
|
||||
last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
|
||||
* bytes_in_stripe;
|
||||
if (offset == last_stripe_end) /* Optimize for the aligned case */
|
||||
goto read_it;
|
||||
|
||||
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
|
||||
p = read_si.cur_pg;
|
||||
c = read_si.cur_comp;
|
||||
|
||||
if (min_p == sp2d->pages_in_unit) {
|
||||
/* Didn't do it yet */
|
||||
min_p = _sp2d_min_pg(sp2d);
|
||||
max_p = _sp2d_max_pg(sp2d);
|
||||
}
|
||||
|
||||
ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
|
||||
offset, last_stripe_end, min_p, max_p);
|
||||
|
||||
while (offset < last_stripe_end) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
if ((min_p <= p) && (p <= max_p)) {
|
||||
struct page *page;
|
||||
bool uptodate;
|
||||
|
||||
BUG_ON(_1ps->pages[c]);
|
||||
page = ios->r4w->get_page(ios->private, offset,
|
||||
&uptodate);
|
||||
if (unlikely(!page))
|
||||
return -ENOMEM;
|
||||
|
||||
_1ps->pages[c] = page;
|
||||
/* Mark read-pages to be cache_released */
|
||||
_1ps->page_is_read[c] = true;
|
||||
if (!uptodate)
|
||||
_add_to_r4w(ios, &read_si, page, PAGE_SIZE);
|
||||
}
|
||||
|
||||
offset += PAGE_SIZE;
|
||||
if (p == (sp2d->pages_in_unit - 1)) {
|
||||
++c;
|
||||
p = 0;
|
||||
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
|
||||
} else {
|
||||
read_si.obj_offset += PAGE_SIZE;
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
read_it:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _read_4_write_execute(struct ore_io_state *ios)
|
||||
{
|
||||
struct ore_io_state *ios_read;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
ios_read = ios->ios_read_4_write;
|
||||
if (!ios_read)
|
||||
return 0;
|
||||
|
||||
/* FIXME: Ugly to signal _sbi_read_mirror that we have bio(s). Change
|
||||
* to check for per_dev->bio
|
||||
*/
|
||||
ios_read->pages = ios->pages;
|
||||
|
||||
/* Now read these devices */
|
||||
for (i = 0; i < ios_read->numdevs; i += ios_read->layout->mirrors_p1) {
|
||||
ret = _ore_read_mirror(ios_read, i);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ore_io_execute(ios_read); /* Synchronus execution */
|
||||
if (unlikely(ret)) {
|
||||
ORE_DBGMSG("!! ore_io_execute => %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
_mark_read4write_pages_uptodate(ios_read, ret);
|
||||
ore_put_io_state(ios_read);
|
||||
ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* In writes @cur_len means length left. .i.e cur_len==0 is the last parity U */
|
||||
int _ore_add_parity_unit(struct ore_io_state *ios,
|
||||
struct ore_striping_info *si,
|
||||
struct ore_per_dev_state *per_dev,
|
||||
unsigned cur_len, bool do_xor)
|
||||
{
|
||||
if (ios->reading) {
|
||||
if (per_dev->cur_sg >= ios->sgs_per_dev) {
|
||||
ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
|
||||
per_dev->cur_sg, ios->sgs_per_dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
_ore_add_sg_seg(per_dev, cur_len, true);
|
||||
} else {
|
||||
struct __stripe_pages_2d *sp2d = ios->sp2d;
|
||||
struct page **pages = ios->parity_pages + ios->cur_par_page;
|
||||
unsigned num_pages;
|
||||
unsigned array_start = 0;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
si->cur_pg = _sp2d_min_pg(sp2d);
|
||||
num_pages = _sp2d_max_pg(sp2d) + 1 - si->cur_pg;
|
||||
|
||||
if (!per_dev->length) {
|
||||
per_dev->offset += si->cur_pg * PAGE_SIZE;
|
||||
/* If first stripe, Read in all read4write pages
|
||||
* (if needed) before we calculate the first parity.
|
||||
*/
|
||||
if (do_xor)
|
||||
_read_4_write_first_stripe(ios);
|
||||
}
|
||||
if (!cur_len && do_xor)
|
||||
/* If last stripe r4w pages of last stripe */
|
||||
_read_4_write_last_stripe(ios);
|
||||
_read_4_write_execute(ios);
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
pages[i] = _raid_page_alloc();
|
||||
if (unlikely(!pages[i]))
|
||||
return -ENOMEM;
|
||||
|
||||
++(ios->cur_par_page);
|
||||
}
|
||||
|
||||
BUG_ON(si->cur_comp < sp2d->data_devs);
|
||||
BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit);
|
||||
|
||||
ret = _ore_add_stripe_unit(ios, &array_start, 0, pages,
|
||||
per_dev, num_pages * PAGE_SIZE);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (do_xor) {
|
||||
_gen_xor_unit(sp2d);
|
||||
_sp2d_reset(sp2d, ios->r4w, ios->private);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
|
||||
{
|
||||
if (ios->parity_pages) {
|
||||
struct ore_layout *layout = ios->layout;
|
||||
unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
|
||||
|
||||
if (_sp2d_alloc(pages_in_unit, layout->group_width,
|
||||
layout->parity, &ios->sp2d)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void _ore_free_raid_stuff(struct ore_io_state *ios)
|
||||
{
|
||||
if (ios->sp2d) { /* writing and raid */
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ios->cur_par_page; i++) {
|
||||
struct page *page = ios->parity_pages[i];
|
||||
|
||||
if (page)
|
||||
_raid_page_free(page);
|
||||
}
|
||||
if (ios->extra_part_alloc)
|
||||
kfree(ios->parity_pages);
|
||||
/* If IO returned an error pages might need unlocking */
|
||||
_sp2d_reset(ios->sp2d, ios->r4w, ios->private);
|
||||
_sp2d_free(ios->sp2d);
|
||||
} else {
|
||||
/* Will only be set if raid reading && sglist is big */
|
||||
if (ios->extra_part_alloc)
|
||||
kfree(ios->per_dev[0].sglist);
|
||||
}
|
||||
if (ios->ios_read_4_write)
|
||||
ore_put_io_state(ios->ios_read_4_write);
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) from 2011
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This file is part of the objects raid engine (ore).
|
||||
*
|
||||
* It is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "ore". If not, write to the Free Software Foundation, Inc:
|
||||
* "Free Software Foundation <info@fsf.org>"
|
||||
*/
|
||||
|
||||
#include <scsi/osd_ore.h>
|
||||
|
||||
#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
|
||||
|
||||
#ifdef CONFIG_EXOFS_DEBUG
|
||||
#define ORE_DBGMSG(fmt, a...) \
|
||||
printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
|
||||
#else
|
||||
#define ORE_DBGMSG(fmt, a...) \
|
||||
do { if (0) printk(fmt, ##a); } while (0)
|
||||
#endif
|
||||
|
||||
/* u64 has problems with printk this will cast it to unsigned long long */
|
||||
#define _LLU(x) (unsigned long long)(x)
|
||||
|
||||
#define ORE_DBGMSG2(M...) do {} while (0)
|
||||
/* #define ORE_DBGMSG2 ORE_DBGMSG */
|
||||
|
||||
/* ios_raid.c stuff needed by ios.c */
|
||||
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios);
|
||||
void _ore_free_raid_stuff(struct ore_io_state *ios);
|
||||
|
||||
void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
|
||||
bool not_last);
|
||||
int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si,
|
||||
struct ore_per_dev_state *per_dev, unsigned cur_len,
|
||||
bool do_xor);
|
||||
void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
|
||||
struct ore_striping_info *si, struct page *page);
|
||||
static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,
|
||||
struct ore_striping_info *si, struct page *page)
|
||||
{
|
||||
if (!sp2d) /* Inline the fast path */
|
||||
return; /* Hay no raid stuff */
|
||||
_ore_add_stripe_page(sp2d, si, page);
|
||||
}
|
||||
|
||||
/* ios.c stuff needed by ios_raid.c */
|
||||
int _ore_get_io_state(struct ore_layout *layout,
|
||||
struct ore_components *oc, unsigned numdevs,
|
||||
unsigned sgs_per_dev, unsigned num_par_pages,
|
||||
struct ore_io_state **pios);
|
||||
int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
|
||||
unsigned pgbase, struct page **pages,
|
||||
struct ore_per_dev_state *per_dev, int cur_len);
|
||||
int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp);
|
||||
int ore_io_execute(struct ore_io_state *ios);
|
1071
fs/exofs/super.c
1071
fs/exofs/super.c
File diff suppressed because it is too large
Load Diff
205
fs/exofs/sys.c
205
fs/exofs/sys.c
@ -1,205 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2012
|
||||
* Sachin Bhamare <sbhamare@panasas.com>
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This file is part of exofs.
|
||||
*
|
||||
* exofs is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*
|
||||
* exofs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with exofs; if not, write to the:
|
||||
* Free Software Foundation <licensing@fsf.org>
|
||||
*/
|
||||
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/device.h>
|
||||
|
||||
#include "exofs.h"
|
||||
|
||||
struct odev_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct exofs_dev *, char *);
|
||||
ssize_t (*store)(struct exofs_dev *, const char *, size_t);
|
||||
};
|
||||
|
||||
static ssize_t odev_attr_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
|
||||
struct odev_attr *a = container_of(attr, struct odev_attr, attr);
|
||||
|
||||
return a->show ? a->show(edp, buf) : 0;
|
||||
}
|
||||
|
||||
static ssize_t odev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
|
||||
struct odev_attr *a = container_of(attr, struct odev_attr, attr);
|
||||
|
||||
return a->store ? a->store(edp, buf, len) : len;
|
||||
}
|
||||
|
||||
static const struct sysfs_ops odev_attr_ops = {
|
||||
.show = odev_attr_show,
|
||||
.store = odev_attr_store,
|
||||
};
|
||||
|
||||
|
||||
static struct kset *exofs_kset;
|
||||
|
||||
static ssize_t osdname_show(struct exofs_dev *edp, char *buf)
|
||||
{
|
||||
struct osd_dev *odev = edp->ored.od;
|
||||
const struct osd_dev_info *odi = osduld_device_info(odev);
|
||||
|
||||
return snprintf(buf, odi->osdname_len + 1, "%s", odi->osdname);
|
||||
}
|
||||
|
||||
static ssize_t systemid_show(struct exofs_dev *edp, char *buf)
|
||||
{
|
||||
struct osd_dev *odev = edp->ored.od;
|
||||
const struct osd_dev_info *odi = osduld_device_info(odev);
|
||||
|
||||
memcpy(buf, odi->systemid, odi->systemid_len);
|
||||
return odi->systemid_len;
|
||||
}
|
||||
|
||||
static ssize_t uri_show(struct exofs_dev *edp, char *buf)
|
||||
{
|
||||
return snprintf(buf, edp->urilen, "%s", edp->uri);
|
||||
}
|
||||
|
||||
static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len)
|
||||
{
|
||||
uint8_t *new_uri;
|
||||
|
||||
edp->urilen = strlen(buf) + 1;
|
||||
new_uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL);
|
||||
if (new_uri == NULL)
|
||||
return -ENOMEM;
|
||||
edp->uri = new_uri;
|
||||
strncpy(edp->uri, buf, edp->urilen);
|
||||
return edp->urilen;
|
||||
}
|
||||
|
||||
#define OSD_ATTR(name, mode, show, store) \
|
||||
static struct odev_attr odev_attr_##name = \
|
||||
__ATTR(name, mode, show, store)
|
||||
|
||||
OSD_ATTR(osdname, S_IRUGO, osdname_show, NULL);
|
||||
OSD_ATTR(systemid, S_IRUGO, systemid_show, NULL);
|
||||
OSD_ATTR(uri, S_IRWXU, uri_show, uri_store);
|
||||
|
||||
static struct attribute *odev_attrs[] = {
|
||||
&odev_attr_osdname.attr,
|
||||
&odev_attr_systemid.attr,
|
||||
&odev_attr_uri.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct kobj_type odev_ktype = {
|
||||
.default_attrs = odev_attrs,
|
||||
.sysfs_ops = &odev_attr_ops,
|
||||
};
|
||||
|
||||
static struct kobj_type uuid_ktype = {
|
||||
};
|
||||
|
||||
void exofs_sysfs_dbg_print(void)
|
||||
{
|
||||
#ifdef CONFIG_EXOFS_DEBUG
|
||||
struct kobject *k_name, *k_tmp;
|
||||
|
||||
list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
|
||||
printk(KERN_INFO "%s: name %s ref %d\n",
|
||||
__func__, kobject_name(k_name),
|
||||
(int)kref_read(&k_name->kref));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
* This function removes all kobjects under exofs_kset
|
||||
* At the end of it, exofs_kset kobject will have a refcount
|
||||
* of 1 which gets decremented only on exofs module unload
|
||||
*/
|
||||
void exofs_sysfs_sb_del(struct exofs_sb_info *sbi)
|
||||
{
|
||||
struct kobject *k_name, *k_tmp;
|
||||
struct kobject *s_kobj = &sbi->s_kobj;
|
||||
|
||||
list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
|
||||
/* Remove all that are children of this SBI */
|
||||
if (k_name->parent == s_kobj)
|
||||
kobject_put(k_name);
|
||||
}
|
||||
kobject_put(s_kobj);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function creates sysfs entries to hold the current exofs cluster
|
||||
* instance (uniquely identified by osdname,pid tuple).
|
||||
* This function gets called once per exofs mount instance.
|
||||
*/
|
||||
int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
|
||||
struct exofs_dt_device_info *dt_dev)
|
||||
{
|
||||
struct kobject *s_kobj;
|
||||
int retval = 0;
|
||||
uint64_t pid = sbi->one_comp.obj.partition;
|
||||
|
||||
/* allocate new uuid dirent */
|
||||
s_kobj = &sbi->s_kobj;
|
||||
s_kobj->kset = exofs_kset;
|
||||
retval = kobject_init_and_add(s_kobj, &uuid_ktype,
|
||||
&exofs_kset->kobj, "%s_%llx", dt_dev->osdname, pid);
|
||||
if (retval) {
|
||||
EXOFS_ERR("ERROR: Failed to create sysfs entry for "
|
||||
"uuid-%s_%llx => %d\n", dt_dev->osdname, pid, retval);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int exofs_sysfs_odev_add(struct exofs_dev *edev, struct exofs_sb_info *sbi)
|
||||
{
|
||||
struct kobject *d_kobj;
|
||||
int retval = 0;
|
||||
|
||||
/* create osd device group which contains following attributes
|
||||
* osdname, systemid & uri
|
||||
*/
|
||||
d_kobj = &edev->ed_kobj;
|
||||
d_kobj->kset = exofs_kset;
|
||||
retval = kobject_init_and_add(d_kobj, &odev_ktype,
|
||||
&sbi->s_kobj, "dev%u", edev->did);
|
||||
if (retval) {
|
||||
EXOFS_ERR("ERROR: Failed to create sysfs entry for "
|
||||
"device dev%u\n", edev->did);
|
||||
return retval;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int exofs_sysfs_init(void)
|
||||
{
|
||||
exofs_kset = kset_create_and_add("exofs", NULL, fs_kobj);
|
||||
if (!exofs_kset) {
|
||||
EXOFS_ERR("ERROR: kset_create_and_add exofs failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void exofs_sysfs_uninit(void)
|
||||
{
|
||||
kset_unregister(exofs_kset);
|
||||
}
|
Loading…
Reference in New Issue
Block a user