diff --git a/.gitignore b/.gitignore index 66fd13c..d9592b4 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ # Dependency directories (remove the comment below to include it) # vendor/ +.idea/ diff --git a/.travis.yml b/.travis.yml index d6a4d97..86de2cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,9 +22,6 @@ addons: - attr - lsof - gcc-mingw-w64 # for windows -before_install: - - source ./check-changed.sh - - if [ $SKIP_TEST == true ]; then exit 0; fi install: true before_script: - export GO111MODULE=on diff --git a/ADOPTERS.md b/ADOPTERS.md new file mode 100644 index 0000000..8f4d2bc --- /dev/null +++ b/ADOPTERS.md @@ -0,0 +1,29 @@ +# JuiceFS Adopters + +For users of JuiceFS: + +| Company/Team | Environment | Use Cases | +| :--- | :--- | :--- | +| [Xiaomi](https://www.mi.com) | Production | AI | +| [Li Auto Inc.](https://www.lixiang.com) | Production | Big Data, AI | +| [Shopee](https://shopee.com) | Production | Big Data | +| [Zhihu](https://www.zhihu.com) | Production | Big Data | +| [Yaoxin Financing Re-Guarantee](https://www.yaoxinhd.com) | Production | Big Data, File Sharing | +| [Megvii](https://megvii.com) | Production | AI | +| [Piesat Information Technology Co., Ltd.](https://www.piesat.cn) | Production | File Sharing | +| [Gene Way](http://www.geneway.cn) | Production | File Sharing | +| [Dingdong Fresh](https://www.100.me) | Testing | Big Data | +| [UniSound](https://www.unisound.com) | Testing | AI | +| [SF-Express](https://www.sf-express.com) | Testing | Big Data, File Sharing | +| [BIGO](https://bigo.tv) | Testing | AI | +| [Inner Mongolia MENGSHANG Consumer Finance Co., Ltd.](https://www.mengshangxiaofei.com) | Testing | File Sharing | + +For the JuiceFS community ecosystem: + +- [Megvii](https://en.megvii.com) team contributed [Python SDK](https://github.com/megvii-research/juicefs-python). +- [PaddlePaddle](https://github.com/paddlepaddle/paddle) team has integrated JuiceFS into [Paddle Operator](https://github.com/PaddleFlow/paddle-operator), please refer to [the document](https://github.com/PaddleFlow/paddle-operator/blob/sampleset/docs/en/ext-overview.md). +- Build a distributed [Milvus](https://milvus.io) cluster based on JuiceFS, the Milvus team wrote a [case sharing](https://zilliz.com/blog/building-a-milvus-cluster-based-on-juicefs) and [tutorial](https://tutorials.milvus.io/en-juicefs/index.html?index=..%2F..index#0). +- [Apache Kylin 4.0](http://kylin.apache.org) that is a OLAP engine could deploy with the JuiceFS in dissaggregated storage and compute architecture on every public cloud platform, there is [the video sharing](https://www.bilibili.com/video/BV1c54y1W72S) (in Chinese) and [the post](https://juicefs.com/blog/en/posts/optimize-kylin-on-juicefs/) for this use case. +- [UniSound](https://www.unisound.com) team participated in the development of [Fluid](https://github.com/fluid-cloudnative/fluid) JuiceFSRuntime cache engine, please refer to [this document](https://github.com/fluid-cloudnative/fluid/blob/master/docs/en/samples/juicefs_runtime.md). + +You are welcome to share your experience after using JuiceFS, either by submitting a Pull Request directly to this list, or by contacting us at hello@juicedata.io. diff --git a/ADOPTERS_CN.md b/ADOPTERS_CN.md new file mode 100644 index 0000000..230285b --- /dev/null +++ b/ADOPTERS_CN.md @@ -0,0 +1,29 @@ +# JuiceFS 使用者 + +使用 JuiceFS 的用户: + +| 公司/团队 | 使用环境 | 用例 | +| :--- | :--- | :--- | +| [小米](https://www.mi.com) | Production | AI | +| [理想汽车](https://www.lixiang.com) | Production | 大数据,AI | +| [Shopee](https://shopee.com) | Production | 大数据 | +| [知乎](https://www.zhihu.com) | Production | 大数据 | +| [尧信](https://www.yaoxinhd.com) | Production | 大数据,共享文件存储 | +| [旷视](https://megvii.com) | Production | AI | +| [航天宏图](https://www.piesat.cn) | Production | 共享文件存储 | +| [溯源精微](http://www.geneway.cn) | Production | 共享文件存储 | +| [叮咚买菜](https://www.100.me) | Testing | 大数据 | +| [云知声](https://www.unisound.com) | Testing | AI | +| [顺丰速运](https://www.sf-express.com) | Testing | 大数据,共享文件存储 | +| [BIGO](https://bigo.tv) | Testing | AI | +| [蒙商消费金融](https://www.mengshangxiaofei.com) | Testing | 共享文件存储 | + +此外在社区生态方面, + +- [旷视科技](https://megvii.com)团队贡献了 [Python SDK](https://github.com/megvii-research/juicefs-python)。 +- [PaddlePaddle](https://github.com/paddlepaddle/paddle) 团队已将 JuiceFS 缓存加速特性集成到 [Paddle Operator](https://github.com/PaddleFlow/paddle-operator) 中,具体请参考[文档](https://github.com/PaddleFlow/paddle-operator/blob/sampleset/docs/zh_CN/ext-overview.md)。 +- 通过 JuiceFS 可以轻松搭建一个 [Milvus](https://milvus.io) 向量搜索引擎,Milvus 团队已经撰写了官方 [案例](https://zilliz.com/blog/building-a-milvus-cluster-based-on-juicefs) 与 [教程](https://tutorials.milvus.io/en-juicefs/index.html?index=..%2F..index#0)。 +- 大数据 OLAP 分析引擎 [Apache Kylin 4.0](http://kylin.apache.org) 可以使用 JuiceFS 在所有公有云上轻松部署存储计算分离架构的集群,请看 [视频分享](https://www.bilibili.com/video/BV1c54y1W72S) 和 [案例文章](https://juicefs.com/blog/cn/posts/optimize-kylin-on-juicefs/)。 +- [云知声](https://www.unisound.com) 团队参与开发 [Fluid](https://github.com/fluid-cloudnative/fluid) JuiceFSRuntime 缓存引擎,具体请参考[文档](https://github.com/fluid-cloudnative/fluid/blob/master/docs/zh/samples/juicefs_runtime.md) 。 + +欢迎你在使用 JuiceFS 后,向大家分享你的使用经验,可以直接向这个列表提交 Pull Request,或者联系我们 hello@juicedata.io。 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..e69de29 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c958590 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,40 @@ +# Contributing to JuiceFS + +## Guidelines + +- Before starting work on a feature or bug fix, please search GitHub or reach out to us via GitHub, Slack etc. The purpose of this step is make sure no one else is already working on it and we'll ask you to open a GitHub issue if necessary. +- We will use the GitHub issue to discuss the feature and come to agreement. This is to prevent your time being wasted, as well as ours. +- If it is a major feature update, we highly recommend you also write a design document to help the community understand your motivation and solution. +- A good way to find a project properly sized for a first time contributor is to search for open issues with the label ["kind/good-first-issue"](https://github.com/juicedata/juicefs/labels/kind%2Fgood-first-issue) or ["kind/help-wanted"](https://github.com/juicedata/juicefs/labels/kind%2Fhelp-wanted). + +## Coding Style + +- We're following ["Effective Go"](https://golang.org/doc/effective_go.html) and ["Go Code Review Comments"](https://github.com/golang/go/wiki/CodeReviewComments). +- Use `go fmt` to format your code before committing. You can find information in editor support for Go tools in ["IDEs and Plugins for Go"](https://github.com/golang/go/wiki/IDEsAndTextEditorPlugins). +- If you see any code which clearly violates the style guide, please fix it and send a pull request. +- Every new source file must begin with a license header. +- Install [pre-commit](https://pre-commit.com/) and use it to set up a pre-commit hook for static analysis. Just run `pre-commit install` in the root of the repo. + +## Sign the CLA + +Before you can contribute to JuiceFS, you will need to sign the [Contributor License Agreement](https://cla-assistant.io/juicedata/juicefs). There're a CLA assistant to guide you when you first time submit a pull request. + +## What is a Good PR + +- Presence of unit tests +- Adherence to the coding style +- Adequate in-line comments +- Explanatory commit message + +## Contribution Flow + +This is a rough outline of what a contributor's workflow looks like: + +- Create a topic branch from where to base the contribution. This is usually `main`. +- Make commits of logical units. +- Make sure commit messages are in the proper format. +- Push changes in a topic branch to a personal fork of the repository. +- Submit a pull request to [juicedata/juicefs](https://github.com/juicedata/juicefs/compare). The PR should link to one issue which either created by you or others. +- The PR must receive approval from at least one maintainer before it be merged. + +Happy hacking! diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..db46fac --- /dev/null +++ b/Makefile @@ -0,0 +1,63 @@ +export GO111MODULE=on + +all: juicefs + +REVISION := $(shell git rev-parse --short HEAD 2>/dev/null) +REVISIONDATE := $(shell git log -1 --pretty=format:'%ad' --date short 2>/dev/null) +PKG := github.com/juicedata/juicefs/pkg/version +LDFLAGS = -s -w +ifneq ($(strip $(REVISION)),) # Use git clone + LDFLAGS += -X $(PKG).revision=$(REVISION) \ + -X $(PKG).revisionDate=$(REVISIONDATE) +endif + +SHELL = /bin/sh + +ifdef STATIC + LDFLAGS += -linkmode external -extldflags '-static' + CC = /usr/bin/musl-gcc + export CC +endif + +juicefs: Makefile cmd/*.go pkg/*/*.go + go build -ldflags="$(LDFLAGS)" -o juicefs ./cmd + +juicefs.lite: Makefile cmd/*.go pkg/*/*.go + go build -tags nogateway,nocos,nobos,nohdfs,noibmcos,noobs,nooss,noqingstor,noscs,nosftp,noswift,noupyun,noazure,nogs,noufile,nob2,nosqlite,nomysql,nopg,notikv,nobadger \ + -ldflags="$(LDFLAGS)" -o juicefs.lite ./cmd + +juicefs.ceph: Makefile cmd/*.go pkg/*/*.go + go build -tags ceph -ldflags="$(LDFLAGS)" -o juicefs.ceph ./cmd + +/usr/local/include/winfsp: + sudo mkdir -p /usr/local/include/winfsp + sudo cp hack/winfsp_headers/* /usr/local/include/winfsp + +juicefs.exe: /usr/local/include/winfsp cmd/*.go pkg/*/*.go + GOOS=windows CGO_ENABLED=1 CC=x86_64-w64-mingw32-gcc \ + go build -ldflags="$(LDFLAGS)" -buildmode exe -o juicefs.exe ./cmd + +.PHONY: snapshot release test +snapshot: + docker run --rm --privileged \ + -e PRIVATE_KEY=${PRIVATE_KEY} \ + -v ~/go/pkg/mod:/go/pkg/mod \ + -v `pwd`:/go/src/github.com/juicedata/juicefs \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -w /go/src/github.com/juicedata/juicefs \ + juicedata/golang-cross:latest release --snapshot --rm-dist --skip-publish + +release: + docker run --rm --privileged \ + -e PRIVATE_KEY=${PRIVATE_KEY} \ + --env-file .release-env \ + -v ~/go/pkg/mod:/go/pkg/mod \ + -v `pwd`:/go/src/github.com/juicedata/juicefs \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -w /go/src/github.com/juicedata/juicefs \ + juicedata/golang-cross:latest release --rm-dist + +test: + JFS_PAGE_STACK=1 go test -v -cover ./pkg/... -coverprofile=cov1.out + sudo JFS_PAGE_STACK=1 JFS_GC_SKIPPEDTIME=1 `which go` test -v -cover ./cmd/... -coverprofile=cov2.out + sudo `which go` test ./integration/... -cover -coverprofile=cov3.out -coverpkg=./pkg/...,./cmd/... diff --git a/README.md b/README.md index ab7dac6..82ce9f2 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,227 @@ -# test-ci -test some ci problem that not easy to resolve +

JuiceFS Logo

+

+ Build Status + Join Slack + Go Report + 中文手册 +

+ +**JuiceFS** is a high-performance [POSIX](https://en.wikipedia.org/wiki/POSIX) file system released under Apache License 2.0. It is specially optimized for the cloud-native environment. Using the JuiceFS to store data, the data itself will be persisted in object storage (e.g. Amazon S3), and the metadata corresponding to the data can be persisted in various database engines such as Redis, MySQL, and SQLite according to the needs of the scene. + +JuiceFS can simply and conveniently connect massive cloud storage directly to big data, machine learning, artificial intelligence, and various application platforms that have been put into production environment, without modifying the code, you can use massive cloud storage as efficiently as using local storage. + +📺 **Video**: [What is JuiceFS?](https://www.youtube.com/watch?v=8RdZoBG-D6Y) + +## Highlighted Features + +1. **Fully POSIX-compatible**: Use like a local file system, seamlessly docking with existing applications, no business intrusion. +2. **Fully Hadoop-compatible**: JuiceFS [Hadoop Java SDK](docs/en/deployment/hadoop_java_sdk.md) is compatible with Hadoop 2.x and Hadoop 3.x. As well as variety of components in Hadoop ecosystem. +3. **S3-compatible**: JuiceFS [S3 Gateway](docs/en/deployment/s3_gateway.md) provides S3-compatible interface. +4. **Cloud Native**: JuiceFS provides [Kubernetes CSI driver](docs/en/deployment/how_to_use_on_kubernetes.md) to help people who want to use JuiceFS in Kubernetes. +5. **Sharing**: JuiceFS is a shared file storage that can be read and written by thousands clients. +6. **Strong Consistency**: The confirmed modification will be immediately visible on all servers mounted with the same file system . +7. **Outstanding Performance**: The latency can be as low as a few milliseconds and the throughput can be expanded to nearly unlimited. [Test results](docs/en/benchmark/benchmark.md) +8. **Data Encryption**: Supports data encryption in transit and at rest, read [the guide](docs/en/security/encrypt.md) for more information. +9. **Global File Locks**: JuiceFS supports both BSD locks (flock) and POSIX record locks (fcntl). +10. **Data Compression**: JuiceFS supports use [LZ4](https://lz4.github.io/lz4) or [Zstandard](https://facebook.github.io/zstd) to compress all your data. + +--- + +[Architecture](#architecture) | [Getting Started](#getting-started) | [Advanced Topics](#advanced-topics) | [POSIX Compatibility](#posix-compatibility) | [Performance Benchmark](#performance-benchmark) | [Supported Object Storage](#supported-object-storage) | [Who is using](#who-is-using) | [Roadmap](#roadmap) | [Reporting Issues](#reporting-issues) | [Contributing](#contributing) | [Community](#community) | [Usage Tracking](#usage-tracking) | [License](#license) | [Credits](#credits) | [FAQ](#faq) + +--- + +## Architecture + +JuiceFS consists of three parts: + +1. **JuiceFS Client**: Coordinate the implementation of object storage and metadata storage engines, as well as file system interfaces such as POSIX, Hadoop, Kubernetes, and S3 gateway. +2. **Data Storage**: Store the data itself, support local disk and object storage. +3. **Metadata Engine**: Metadata corresponding to the stored data, supporting multiple engines such as Redis, MySQL, and SQLite; + +![JuiceFS Architecture](docs/en/images/juicefs-arch-new.png) + +JuiceFS relies on Redis to store file system metadata. Redis is a fast, open-source, in-memory key-value data store and very suitable for storing the metadata. All the data will store into object storage through JuiceFS client. [Learn more](docs/en/introduction/architecture.md) + +![JuiceFS Storage Format](docs/en/images/juicefs-storage-format-new.png) + +Any file stored in JuiceFS will be split into fixed-size **"Chunk"**, and the default upper limit is 64 MiB. Each Chunk is composed of one or more **"Slice"**. The length of the slice is not fixed, depending on the way the file is written. Each slice will be further split into fixed-size **"Block"**, which is 4 MiB by default. Finally, these blocks will be stored in the object storage. At the same time, JuiceFS will store each file and its Chunks, Slices, Blocks and other metadata information in metadata engines. [Learn more](docs/en/reference/how_juicefs_store_files.md) + +![How JuiceFS stores your files](docs/en/images/how-juicefs-stores-files-new.png) + +Using JuiceFS, files will eventually be split into Chunks, Slices and Blocks and stored in object storage. Therefore, you will find that the source files stored in JuiceFS cannot be found in the file browser of the object storage platform. There is a chunks directory and a bunch of digitally numbered directories and files in the bucket. Don't panic, this is the secret of the high-performance operation of the JuiceFS! + +## Getting Started + +To create a JuiceFS, you need the following 3 preparations: + +1. Redis database for metadata storage +2. Object storage is used to store data blocks +3. JuiceFS Client + +Please refer to [Quick Start Guide](docs/en/getting-started/for_local.md) to start using JuiceFS immediately! + +### Command Reference + +There is a [command reference](docs/en/reference/command_reference.md) to see all options of the subcommand. + +### Kubernetes + +[Using JuiceFS on Kubernetes](docs/en/deployment/how_to_use_on_kubernetes.md) is so easy, have a try. + +### Hadoop Java SDK + +If you wanna use JuiceFS in Hadoop, check [Hadoop Java SDK](docs/en/deployment/hadoop_java_sdk.md). + +## Advanced Topics + +- [Redis Best Practices](docs/en/administration/metadata/redis_best_practices.md) +- [How to Setup Object Storage](docs/en/reference/how_to_setup_object_storage.md) +- [Cache Management](docs/en/administration/cache_management.md) +- [Fault Diagnosis and Analysis](docs/en/administration/fault_diagnosis_and_analysis.md) +- [FUSE Mount Options](docs/en/reference/fuse_mount_options.md) +- [Using JuiceFS on Windows](docs/en/juicefs_on_windows.md) +- [S3 Gateway](docs/en/deployment/s3_gateway.md) + +Please refer to [JuiceFS User Manual](docs/en/README.md) for more information. + +## POSIX Compatibility + +JuiceFS passed all of the 8813 tests in latest [pjdfstest](https://github.com/pjd/pjdfstest). + +``` +All tests successful. + +Test Summary Report +------------------- +/root/soft/pjdfstest/tests/chown/00.t (Wstat: 0 Tests: 1323 Failed: 0) + TODO passed: 693, 697, 708-709, 714-715, 729, 733 +Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr 0.38 sys + 2.57 cusr 3.93 csys = 9.65 CPU) +Result: PASS +``` + +Besides the things covered by pjdfstest, JuiceFS provides: + +- Close-to-open consistency. Once a file is closed, the following open and read are guaranteed see the data written before close. Within same mount point, read can see all data written before it immediately. +- Rename and all other metadata operations are atomic guaranteed by Redis transaction. +- Open files remain accessible after unlink from same mount point. +- Mmap is supported (tested with FSx). +- Fallocate with punch hole support. +- Extended attributes (xattr). +- BSD locks (flock). +- POSIX record locks (fcntl). + +## Performance Benchmark + +### Basic benchmark + +JuiceFS provides a subcommand to run a few basic benchmarks to understand how it works in your environment: + +![JuiceFS Bench](docs/en/images/juicefs-bench.png) + +### Throughput + +Performed a sequential read/write benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [fio](https://github.com/axboe/fio), here is the result: + +![Sequential Read Write Benchmark](docs/en/images/sequential-read-write-benchmark.svg) + +It shows JuiceFS can provide 10X more throughput than the other two, read [more details](docs/en/benchmark/fio.md). + +### Metadata IOPS + +Performed a simple mdtest benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [mdtest](https://github.com/hpc/ior), here is the result: + +![Metadata Benchmark](docs/en/images/metadata-benchmark.svg) + +It shows JuiceFS can provide significantly more metadata IOPS than the other two, read [more details](docs/en/benchmark/mdtest.md). + +### Analyze performance + +There is a virtual file called `.accesslog` in the root of JuiceFS to show all the operations and the time they takes, for example: + +```bash +$ cat /jfs/.accesslog +2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010> +2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014> +2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006> +``` + +The last number on each line is the time (in seconds) current operation takes. You can use this directly to debug and analyze performance issues, or try `./juicefs profile /jfs` to monitor real time statistics. Please run `./juicefs profile -h` or refer to [here](docs/en/benchmark/operations_profiling.md) to learn more about this subcommand. + +## Supported Object Storage + +- Amazon S3 +- Google Cloud Storage +- Azure Blob Storage +- Alibaba Cloud Object Storage Service (OSS) +- Tencent Cloud Object Storage (COS) +- QingStor Object Storage +- Ceph RGW +- MinIO +- Local disk +- Redis + +JuiceFS supports almost all object storage services. [Learn more](docs/en/reference/how_to_setup_object_storage.md#supported-object-storage). + +## Who is using + +It's considered as beta quality, the storage format is not stabilized yet. If you want to use it in a production environment, please do a careful and serious evaluation first. If you are interested in it, please test it as soon as possible and give us [feedback](https://github.com/juicedata/juicefs/discussions). + +You are welcome to tell us after using JuiceFS and share your experience with everyone. We have also collected a summary list in [ADOPTERS.md](ADOPTERS.md), which also includes other open source projects used with JuiceFS. + +## Roadmap + +- Stabilize storage format +- Support FoundationDB as meta engine +- User and group quotas +- Directory quotas +- Snapshot +- Write once read many (WORM) + +## Reporting Issues + +We use [GitHub Issues](https://github.com/juicedata/juicefs/issues) to track community reported issues. You can also [contact](#community) the community for getting answers. + +## Contributing + +Thank you for your contribution! Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) for more information. + +## Community + +Welcome to join the [Discussions](https://github.com/juicedata/juicefs/discussions) and the [Slack channel](https://join.slack.com/t/juicefs/shared_invite/zt-n9h5qdxh-0bJojPaql8cfFgwerDQJgA) to connect with JuiceFS team members and other users. + +## Usage Tracking + +JuiceFS by default collects **anonymous** usage data. It only collects core metrics (e.g. version number), no user or any sensitive data will be collected. You could review related code [here](pkg/usage/usage.go). + +These data help us understand how the community is using this project. You could disable reporting easily by command line option `--no-usage-report`: + +```bash +$ ./juicefs mount --no-usage-report +``` + +## License + +JuiceFS is open-sourced under Apache License 2.0, see [LICENSE](LICENSE). + +## Credits + +The design of JuiceFS was inspired by [Google File System](https://research.google/pubs/pub51), [HDFS](https://hadoop.apache.org) and [MooseFS](https://moosefs.com), thanks to their great work. + +## FAQ + +### Why doesn't JuiceFS support XXX object storage? + +JuiceFS already supported many object storage, please check [the list](docs/en/reference/how_to_setup_object_storage.md#supported-object-storage) first. If this object storage is compatible with S3, you could treat it as S3. Otherwise, try reporting issue. + +### Can I use Redis cluster? + +The simple answer is no. JuiceFS uses [transaction](https://redis.io/topics/transactions) to guarantee the atomicity of metadata operations, which is not well supported in cluster mode. Sentinal or other HA solution for Redis are needed. + +See ["Redis Best Practices"](docs/en/administration/metadata/redis_best_practices.md) for more information. + +### What's the difference between JuiceFS and XXX? + +See ["Comparison with Others"](docs/en/comparison) for more information. + +For more FAQs, please see the [full list](docs/en/faq.md). diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 0000000..2d79e46 --- /dev/null +++ b/README_CN.md @@ -0,0 +1,226 @@ +

JuiceFS Logo

+

+ Build Status + Join Slack + Go Report + English Docs +

+ + +JuiceFS 是一款高性能 [POSIX](https://en.wikipedia.org/wiki/POSIX) 文件系统,针对云原生环境特别优化设计,在 Apache 2.0 开源协议下发布。使用 JuiceFS 存储数据,数据本身会被持久化在对象存储(例如,Amazon S3),而数据所对应的元数据可以根据场景需求被持久化在 Redis、MySQL、SQLite 等多种数据库引擎中。JuiceFS 可以简单便捷的将海量云存储直接接入已投入生产环境的大数据、机器学习、人工智能以及各种应用平台,无需修改代码即可像使用本地存储一样高效使用海量云端存储。 + +📺 **视频**: [什么是 JuiceFS?](https://www.bilibili.com/video/BV1HK4y197va/) + +## 核心特性 + +1. **POSIX 兼容**:像本地文件系统一样使用,无缝对接已有应用,无业务侵入性; +2. **HDFS 兼容**:完整兼容 [HDFS API](docs/zh_cn/deployment/hadoop_java_sdk.md),提供更强的元数据性能; +3. **S3 兼容**:提供 [S3 Gateway](docs/zh_cn/deployment/s3_gateway.md) 实现 S3 协议兼容的访问接口; +4. **云原生**:通过 [Kubernetes CSI driver](docs/zh_cn/deployment/how_to_use_on_kubernetes.md) 可以很便捷地在 Kubernetes 中使用 JuiceFS; +5. **多端共享**:同一文件系统可在上千台服务器同时挂载,高性能并发读写,共享数据; +6. **强一致性**:确认的修改会在所有挂载了同一文件系统的服务器上立即可见,保证强一致性; +7. **强悍性能**:毫秒级的延迟,近乎无限的吞吐量(取决于对象存储规模),查看[性能测试结果](docs/zh_cn/benchmark/benchmark.md); +8. **数据安全**:支持传输中加密(encryption in transit)以及静态加密(encryption at rest),[查看详情](docs/zh_cn/security/encrypt.md); +9. **文件锁**:支持 BSD 锁(flock)及 POSIX 锁(fcntl); +10. **数据压缩**:支持使用 [LZ4](https://lz4.github.io/lz4) 或 [Zstandard](https://facebook.github.io/zstd) 压缩数据,节省存储空间; + +--- + +[架构](#架构) | [开始使用](#开始使用) | [进阶主题](#进阶主题) | [POSIX 兼容性](#posix-兼容性测试) | [性能测试](#性能测试) | [支持的对象存储](#支持的对象存储) | [谁在使用](#谁在使用) | [产品路线图](#产品路线图) | [反馈问题](#反馈问题) | [贡献](#贡献) | [社区](#社区) | [使用量收集](#使用量收集) | [开源协议](#开源协议) | [致谢](#致谢) | [FAQ](#faq) + +--- + +## 架构 + +JuiceFS 由三个部分组成: + +1. **JuiceFS 客户端**:协调对象存储和元数据存储引擎,以及 POSIX、Hadoop、Kubernetes、S3 Gateway 等文件系统接口的实现; +2. **数据存储**:存储数据本身,支持本地磁盘、对象存储; +3. **元数据引擎**:存储数据对应的元数据,支持 Redis、MySQL、SQLite 等多种引擎; + +![JuiceFS Architecture](docs/zh_cn/images/juicefs-arch-new.png) + +JuiceFS 依靠 Redis 来存储文件的元数据。Redis 是基于内存的高性能的键值数据存储,非常适合存储元数据。与此同时,所有数据将通过 JuiceFS 客户端存储到对象存储中。[了解详情](docs/zh_cn/introduction/architecture.md) + +![JuiceFS Storage Format](docs/zh_cn/images/juicefs-storage-format-new.png) + +任何存入 JuiceFS 的文件都会被拆分成固定大小的 **"Chunk"**,默认的容量上限是 64 MiB。每个 Chunk 由一个或多个 **"Slice"** 组成,Slice 的长度不固定,取决于文件写入的方式。每个 Slice 又会被进一步拆分成固定大小的 **"Block"**,默认为 4 MiB。最后,这些 Block 会被存储到对象存储。与此同时,JuiceFS 会将每个文件以及它的 Chunks、Slices、Blocks 等元数据信息存储在元数据引擎中。[了解详情](docs/zh_cn/reference/how_juicefs_store_files.md) + +![How JuiceFS stores your files](docs/zh_cn/images/how-juicefs-stores-files-new.png) + +使用 JuiceFS,文件最终会被拆分成 Chunks、Slices 和 Blocks 存储在对象存储。因此,你会发现在对象存储平台的文件浏览器中找不到存入 JuiceFS 的源文件,存储桶中只有一个 chunks 目录和一堆数字编号的目录和文件。不要惊慌,这正是 JuiceFS 高性能运作的秘诀! + +## 开始使用 + +创建 JuiceFS ,需要以下 3 个方面的准备: + +1. 准备 Redis 数据库 +2. 准备对象存储 +3. 下载安装 JuiceFS 客户端 + +请参照 [快速上手指南](docs/zh_cn/getting-started/for_local.md) 立即开始使用 JuiceFS! + +### 命令索引 + +请点击 [这里](docs/zh_cn/reference/command_reference.md) 查看所有子命令以及命令行参数。 + +### Kubernetes + +在 Kubernetes 中使用 JuiceFS 非常便捷,请查看 [这个文档](docs/zh_cn/deployment/how_to_use_on_kubernetes.md) 了解更多信息。 + +### Hadoop Java SDK + +JuiceFS 使用 [Hadoop Java SDK](docs/zh_cn/deployment/hadoop_java_sdk.md) 与 Hadoop 生态结合。 + +## 进阶主题 + +- [Redis 最佳实践](docs/zh_cn/administration/metadata/redis_best_practices.md) +- [如何设置对象存储](docs/zh_cn/reference/how_to_setup_object_storage.md) +- [缓存管理](docs/zh_cn/administration/cache_management.md) +- [故障诊断和分析](docs/zh_cn/administration/fault_diagnosis_and_analysis.md) +- [FUSE 挂载选项](docs/zh_cn/reference/fuse_mount_options.md) +- [在 Windows 中使用 JuiceFS](docs/zh_cn/juicefs_on_windows.md) +- [S3 网关](docs/zh_cn/deployment/s3_gateway.md) + +请查阅 [JuiceFS 用户手册](docs/zh_cn/README.md) 了解更多信息。 + +## POSIX 兼容性测试 + +JuiceFS 通过了 [pjdfstest](https://github.com/pjd/pjdfstest) 最新版所有 8813 项兼容性测试。 + +``` +All tests successful. + +Test Summary Report +------------------- +/root/soft/pjdfstest/tests/chown/00.t (Wstat: 0 Tests: 1323 Failed: 0) + TODO passed: 693, 697, 708-709, 714-715, 729, 733 +Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr 0.38 sys + 2.57 cusr 3.93 csys = 9.65 CPU) +Result: PASS +``` + +除了 pjdfstests 覆盖的那些 POSIX 特性外,JuiceFS 还支持: + +- 关闭再打开(close-to-open)一致性。一旦一个文件写入完成并关闭,之后的打开和读操作保证可以访问之前写入的数据。如果是在同一个挂载点,所有写入的数据都可以立即读。 +- 重命名以及所有其他元数据操作都是原子的,由 Redis 的事务机制保证。 +- 当文件被删除后,同一个挂载点上如果已经打开了,文件还可以继续访问。 +- 支持 mmap +- 支持 fallocate 以及空洞 +- 支持扩展属性 +- 支持 BSD 锁(flock) +- 支持 POSIX 记录锁(fcntl) + +## 性能测试 + +### 基础性能测试 + +JuiceFS 提供一个性能测试的子命令来帮助你了解它在你的环境中的性能表现: + +![JuiceFS Bench](docs/zh_cn/images/juicefs-bench.png) + +### 顺序读写性能 + +使用 [fio](https://github.com/axboe/fio) 测试了 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的顺序读写性能,结果如下: + +![Sequential Read Write Benchmark](docs/zh_cn/images/sequential-read-write-benchmark.svg) + +上图显示 JuiceFS 可以比其他两者提供 10 倍以上的吞吐,详细结果请看[这里](docs/zh_cn/benchmark/fio.md)。 + +### 元数据性能 + +使用 [mdtest](https://github.com/hpc/ior) 测试了 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的元数据性能,结果如下: + +![Metadata Benchmark](docs/zh_cn/images/metadata-benchmark.svg) + +上图显示 JuiceFS 的元数据性能显著优于其他两个,详细的测试报告请看[这里](docs/zh_cn/benchmark/mdtest.md)。 + +### 性能分析 + +在文件系统的根目录有一个叫做 `.accesslog` 的虚拟文件,它提供了所有文件系统操作的细节,以及所消耗的时间,比如: + +```bash +$ cat /jfs/.accesslog +2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010> +2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014> +2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006> +``` + +每一行的最后一个数字是该操作所消耗的时间,单位是秒。你可以直接利用它来分析各种性能问题,或者尝试 `./juicefs profile /jfs` 命令实时监控统计信息。欲进一步了解此子命令请运行 `./juicefs profile -h` 或参阅[这里](docs/zh_cn/benchmark/operations_profiling.md)。 + +## 支持的对象存储 + +- 亚马逊 S3 +- 谷歌云存储 +- 微软云存储 +- 阿里云 OSS +- 腾讯云 COS +- 青云 QingStor 对象存储 +- Ceph RGW +- MinIO +- 本地目录 +- Redis + +JuiceFS 支持几乎所有主流的对象存储服务,[查看详情](docs/zh_cn/reference/how_to_setup_object_storage.md)。 + +## 谁在使用 + +JuiceFS 目前是 beta 状态,核心的存储格式还没有完全确定,如果要使用在生产环境中,请先进行细致认真的评估。如果你对它有兴趣,请尽早测试,并给我们[反馈](https://github.com/juicedata/juicefs/discussions)。 + +欢迎你在使用 JuiceFS 后告诉我们,向大家分享你的使用经验。我们也收集汇总了一份名单在 [ADOPTERS_CN.md](ADOPTERS_CN.md) 中,也包括了其他开源项目与 JuiceFS 搭配使用的情况。 + +## 产品路线图 + +- 稳定存储格式 +- 支持使用 FoundationDB 做元数据引擎 +- 基于用户和组的配额 +- 基于目录的配额 +- 快照 +- 一次写入多次读取(WORM) + +## 反馈问题 + +我们使用 [GitHub Issues](https://github.com/juicedata/juicefs/issues) 来管理社区反馈的问题,你也可以通过其他[渠道](#社区)跟社区联系。 + +## 贡献 + +感谢你的兴趣,请参考 [CONTRIBUTING.md](CONTRIBUTING.md)。 + +## 社区 + +欢迎加入 [Discussions](https://github.com/juicedata/juicefs/discussions) 和 [Slack 频道](https://join.slack.com/t/juicefs/shared_invite/zt-n9h5qdxh-0bJojPaql8cfFgwerDQJgA) 跟我们的团队和其他社区成员交流。 + +## 使用量收集 + +JuiceFS 的客户端会收集 **匿名** 使用数据来帮助我们更好地了解大家如何使用它,它只上报诸如版本号等使用量数据,不包含任何用户信息,完整的代码在 [这里](pkg/usage/usage.go)。 + +你也可以通过下面的方式禁用它: + +```bash +$ ./juicefs mount --no-usage-report +``` + +## 开源协议 + +使用 Apache License 2.0 开源,详见 [LICENSE](LICENSE)。 + +## 致谢 + +JuiceFS 的设计参考了 [Google File System](https://research.google/pubs/pub51)、[HDFS](https://hadoop.apache.org) 以及 [MooseFS](https://moosefs.com),感谢他们的杰出工作。 + +## FAQ + +### 为什么不支持某个对象存储? + +已经支持了绝大部分对象存储,参考这个[列表](docs/zh_cn/reference/how_to_setup_object_storage.md#支持的存储服务)。如果它跟 S3 兼容的话,也可以当成 S3 来使用。否则,请创建一个 issue 来增加支持。 + +### 是否可以使用 Redis 集群版? + +不可以。JuiceFS 使用了 Redis 的[事务功能](https://redis.io/topics/transactions)来保证元数据操作的原子性,而分布式版还不支持分布式事务。哨兵节点或者其他的 Redis 高可用方法是需要的。 + +请查看[「Redis 最佳实践」](docs/zh_cn/administration/metadata/redis_best_practices.md)了解更多信息。 + +### JuiceFS 与 XXX 的区别是什么? + +请查看[「同类技术对比」](docs/zh_cn/comparison)文档了解更多信息。 + +更多 FAQ 请查看[完整列表](docs/zh_cn/faq.md)。 diff --git a/check-changed.sh b/check-changed.sh new file mode 100644 index 0000000..c546cb5 --- /dev/null +++ b/check-changed.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +if [ x"${TRAVIS_COMMIT_RANGE}" == x ] ; then + CHANGED_FILES=`git diff --name-only HEAD~1` +else + CHANGED_FILES=`git diff --name-only $TRAVIS_COMMIT_RANGE` +fi +echo $CHANGED_FILES +DOCS_DIR="docs/" +SKIP_TEST=true + +for CHANGED_FILE in $CHANGED_FILES; do + if ! [[ $CHANGED_FILE =~ $DOCS_DIR ]] ; then + SKIP_TEST=false + break + fi +done \ No newline at end of file diff --git a/cmd/bench.go b/cmd/bench.go new file mode 100644 index 0000000..62e47b0 --- /dev/null +++ b/cmd/bench.go @@ -0,0 +1,452 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "math/rand" + "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/mattn/go-isatty" + "github.com/urfave/cli/v2" +) + +var resultRange = map[string][4]float64{ + "bigwr": {100, 200, 10, 50}, + "bigrd": {100, 200, 10, 50}, + "smallwr": {12.5, 20, 50, 80}, + "smallrd": {50, 100, 10, 20}, + "stat": {20, 1000, 1, 5}, + "fuse": {0, 0, 0.5, 2}, + "meta": {0, 0, 2, 5}, + "put": {0, 0, 100, 200}, + "get": {0, 0, 100, 200}, + "delete": {0, 0, 30, 100}, + "cachewr": {0, 0, 10, 20}, + "cacherd": {0, 0, 1, 5}, +} + +type benchCase struct { + bm *benchmark + name string + fsize, bsize int // file/block size in Bytes + fcount, bcount int // file/block count + wbar, rbar, sbar *utils.Bar // progress bar for write/read/stat +} + +type benchmark struct { + tty bool + big, small *benchCase + threads int + tmpdir string +} + +func (bc *benchCase) writeFiles(index int) { + for i := 0; i < bc.fcount; i++ { + fname := fmt.Sprintf("%s/%s.%d.%d", bc.bm.tmpdir, bc.name, index, i) + fp, err := os.OpenFile(fname, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + logger.Fatalf("Failed to open file %s: %s", fname, err) + } + buf := make([]byte, bc.bsize) + _, _ = rand.Read(buf) + for j := 0; j < bc.bcount; j++ { + if _, err = fp.Write(buf); err != nil { + logger.Fatalf("Failed to write file %s: %s", fname, err) + } + bc.wbar.Increment() + } + _ = fp.Close() + } +} + +func (bc *benchCase) readFiles(index int) { + for i := 0; i < bc.fcount; i++ { + fname := fmt.Sprintf("%s/%s.%d.%d", bc.bm.tmpdir, bc.name, index, i) + fp, err := os.Open(fname) + if err != nil { + logger.Fatalf("Failed to open file %s: %s", fname, err) + } + buf := make([]byte, bc.bsize) + for j := 0; j < bc.bcount; j++ { + if n, err := fp.Read(buf); err != nil || n != bc.bsize { + logger.Fatalf("Failed to read file %s: %d %s", fname, n, err) + } + bc.rbar.Increment() + } + _ = fp.Close() + } +} + +func (bc *benchCase) statFiles(index int) { + for i := 0; i < bc.fcount; i++ { + fname := fmt.Sprintf("%s/%s.%d.%d", bc.bm.tmpdir, bc.name, index, i) + if _, err := os.Stat(fname); err != nil { + logger.Fatalf("Failed to stat file %s: %s", fname, err) + } + bc.sbar.Increment() + } +} + +func (bc *benchCase) run(test string) float64 { + var fn func(int) + switch test { + case "write": + fn = bc.writeFiles + case "read": + fn = bc.readFiles + case "stat": + fn = bc.statFiles + } // default: fatal + var wg sync.WaitGroup + start := time.Now() + for i := 0; i < bc.bm.threads; i++ { + index := i + wg.Add(1) + go func() { + fn(index) + wg.Done() + }() + } + wg.Wait() + return time.Since(start).Seconds() +} + +// blockSize, bigSize in MiB; smallSize in KiB +func newBenchmark(tmpdir string, blockSize, bigSize, smallSize, smallCount, threads int) *benchmark { + bm := &benchmark{threads: threads, tmpdir: tmpdir} + if bigSize > 0 { + bm.big = bm.newCase("bigfile", bigSize<<20, 1, blockSize<<20) + } + if smallSize > 0 && smallCount > 0 { + bm.small = bm.newCase("smallfile", smallSize<<10, smallCount, blockSize<<20) + } + return bm +} + +func (bm *benchmark) newCase(name string, fsize, fcount, bsize int) *benchCase { + bc := &benchCase{ + bm: bm, + name: name, + fsize: fsize, + fcount: fcount, + bsize: bsize, + } + if fsize <= bsize { + bc.bcount = 1 + bc.bsize = fsize + } else { + bc.bcount = (fsize-1)/bsize + 1 + bc.fsize = bc.bcount * bsize + } + return bc +} + +func (bm *benchmark) colorize(item string, value, cost float64, prec int) (string, string) { + svalue := strconv.FormatFloat(value, 'f', prec, 64) + scost := strconv.FormatFloat(cost, 'f', 2, 64) + if bm.tty { + r, ok := resultRange[item] + if !ok { + logger.Fatalf("Invalid item: %s", item) + } + if item == "smallwr" || item == "smallrd" || item == "stat" { + r[0] *= float64(bm.threads) + r[1] *= float64(bm.threads) + } + var color int + if value > r[1] { // max + color = GREEN + } else if value > r[0] { // min + color = YELLOW + } else { + color = RED + } + svalue = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, svalue, RESET_SEQ) + if cost < r[2] { // min + color = GREEN + } else if cost < r[3] { // max + color = YELLOW + } else { + color = RED + } + scost = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, scost, RESET_SEQ) + } + return svalue, scost +} + +func (bm *benchmark) printResult(result [][3]string) { + var rawmax, max [3]int + for _, l := range result { + for i := 0; i < 3; i++ { + if len(l[i]) > rawmax[i] { + rawmax[i] = len(l[i]) + } + } + } + max = rawmax + if bm.tty { + max[1] -= 11 // no color chars + max[2] -= 11 + } + + var b strings.Builder + for i := 0; i < 3; i++ { + b.WriteByte('+') + b.WriteString(strings.Repeat("-", max[i]+2)) + } + b.WriteByte('+') + divider := b.String() + fmt.Println(divider) + + b.Reset() + header := []string{"ITEM", "VALUE", "COST"} + for i := 0; i < 3; i++ { + b.WriteString(" | ") + b.WriteString(padding(header[i], max[i], ' ')) + } + b.WriteString(" |") + fmt.Println(b.String()[1:]) + fmt.Println(divider) + + for _, l := range result { + b.Reset() + for i := 0; i < 3; i++ { + b.WriteString(" | ") + if spaces := rawmax[i] - len(l[i]); spaces > 0 { + b.WriteString(strings.Repeat(" ", spaces)) + } + b.WriteString(l[i]) + } + b.WriteString(" |") + fmt.Println(b.String()[1:]) + } + fmt.Println(divider) +} + +func bench(ctx *cli.Context) error { + setLoggerLevel(ctx) + + /* --- Pre-check --- */ + if ctx.Uint("block-size") == 0 || ctx.Uint("threads") == 0 { + return os.ErrInvalid + } + if ctx.NArg() < 1 { + logger.Fatalln("PATH must be provided") + } + tmpdir, err := filepath.Abs(ctx.Args().First()) + if err != nil { + logger.Fatalf("Failed to get absolute path of %s: %s", ctx.Args().First(), err) + } + tmpdir = filepath.Join(tmpdir, fmt.Sprintf("__juicefs_benchmark_%d__", time.Now().UnixNano())) + bm := newBenchmark(tmpdir, int(ctx.Uint("block-size")), int(ctx.Uint("big-file-size")), + int(ctx.Uint("small-file-size")), int(ctx.Uint("small-file-count")), int(ctx.Uint("threads"))) + if bm.big == nil && bm.small == nil { + return os.ErrInvalid + } + var purgeArgs []string + if os.Getuid() != 0 { + purgeArgs = append(purgeArgs, "sudo") + } + switch runtime.GOOS { + case "darwin": + purgeArgs = append(purgeArgs, "purge") + case "linux": + purgeArgs = append(purgeArgs, "/bin/sh", "-c", "echo 3 > /proc/sys/vm/drop_caches") + default: + logger.Fatal("Currently only support Linux/macOS") + } + + /* --- Prepare --- */ + if _, err := os.Stat(bm.tmpdir); os.IsNotExist(err) { + if err = os.MkdirAll(bm.tmpdir, 0755); err != nil { + logger.Fatalf("Failed to create %s: %s", bm.tmpdir, err) + } + } + var statsPath string + for mp := filepath.Dir(bm.tmpdir); mp != "/"; mp = filepath.Dir(mp) { + if _, err := os.Stat(filepath.Join(mp, ".stats")); err == nil { + statsPath = filepath.Join(mp, ".stats") + break + } + } + dropCaches := func() { + if os.Getenv("SKIP_DROP_CACHES") != "true" { + if err := exec.Command(purgeArgs[0], purgeArgs[1:]...).Run(); err != nil { + logger.Warnf("Failed to clean kernel caches: %s", err) + } + } else { + logger.Warnf("Clear cache operation has been skipped") + } + } + if os.Getuid() != 0 { + fmt.Println("Cleaning kernel cache, may ask for root privilege...") + } + dropCaches() + bm.tty = isatty.IsTerminal(os.Stdout.Fd()) + progress := utils.NewProgress(!bm.tty, false) + if b := bm.big; b != nil { + total := int64(bm.threads * b.fcount * b.bcount) + b.wbar = progress.AddCountBar("Write big", total) + b.rbar = progress.AddCountBar("Read big", total) + } + if s := bm.small; s != nil { + total := int64(bm.threads * s.fcount * s.bcount) + s.wbar = progress.AddCountBar("Write small", total) + s.rbar = progress.AddCountBar("Read small", total) + s.sbar = progress.AddCountBar("Stat file", int64(bm.threads*s.fcount)) + } + + /* --- Run Benchmark --- */ + var stats map[string]float64 + if statsPath != "" { + stats = readStats(statsPath) + } + var result [][3]string + if b := bm.big; b != nil { + cost := b.run("write") + line := [3]string{"Write big file"} + line[1], line[2] = bm.colorize("bigwr", float64((b.fsize>>20)*b.fcount*bm.threads)/cost, cost/float64(b.fcount), 2) + line[1] += " MiB/s" + line[2] += " s/file" + result = append(result, line) + dropCaches() + + cost = b.run("read") + line[0] = "Read big file" + line[1], line[2] = bm.colorize("bigrd", float64((b.fsize>>20)*b.fcount*bm.threads)/cost, cost/float64(b.fcount), 2) + line[1] += " MiB/s" + line[2] += " s/file" + result = append(result, line) + } + if s := bm.small; s != nil { + cost := s.run("write") + line := [3]string{"Write small file"} + line[1], line[2] = bm.colorize("smallwr", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1) + line[1] += " files/s" + line[2] += " ms/file" + result = append(result, line) + dropCaches() + + cost = s.run("read") + line[0] = "Read small file" + line[1], line[2] = bm.colorize("smallrd", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1) + line[1] += " files/s" + line[2] += " ms/file" + result = append(result, line) + dropCaches() + + cost = s.run("stat") + line[0] = "Stat file" + line[1], line[2] = bm.colorize("stat", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1) + line[1] += " files/s" + line[2] += " ms/file" + result = append(result, line) + } + progress.Done() + + /* --- Clean-up --- */ + if err := exec.Command("rm", "-rf", bm.tmpdir).Run(); err != nil { + logger.Warnf("Failed to cleanup %s: %s", bm.tmpdir, err) + } + + /* --- Report --- */ + fmt.Println("Benchmark finished!") + fmt.Printf("BlockSize: %d MiB, BigFileSize: %d MiB, SmallFileSize: %d KiB, SmallFileCount: %d, NumThreads: %d\n", + ctx.Uint("block-size"), ctx.Uint("big-file-size"), ctx.Uint("small-file-size"), ctx.Uint("small-file-count"), ctx.Uint("threads")) + if stats != nil { + stats2 := readStats(statsPath) + diff := func(item string) float64 { + return stats2["juicefs_"+item] - stats["juicefs_"+item] + } + show := func(title, nick, item string) { + count := diff(item + "_total") + var cost float64 + if count > 0 { + cost = diff(item+"_sum") * 1000 / count + } + line := [3]string{title} + line[1], line[2] = bm.colorize(nick, count, cost, 0) + line[1] += " operations" + line[2] += " ms/op" + result = append(result, line) + } + show("FUSE operation", "fuse", "fuse_ops_durations_histogram_seconds") + show("Update meta", "meta", "transaction_durations_histogram_seconds") + show("Put object", "put", "object_request_durations_histogram_seconds_PUT") + show("Get object", "get", "object_request_durations_histogram_seconds_GET") + show("Delete object", "delete", "object_request_durations_histogram_seconds_DELETE") + show("Write into cache", "cachewr", "blockcache_write_hist_seconds") + show("Read from cache", "cacherd", "blockcache_read_hist_seconds") + var fmtString string + if bm.tty { + greenSeq := fmt.Sprintf("%s%dm", COLOR_SEQ, GREEN) + fmtString = fmt.Sprintf("Time used: %s%%.1f%s s, CPU: %s%%.1f%s%%%%, Memory: %s%%.1f%s MiB\n", + greenSeq, RESET_SEQ, greenSeq, RESET_SEQ, greenSeq, RESET_SEQ) + } else { + fmtString = "Time used: %.1f s, CPU: %.1f%%, Memory: %.1f MiB\n" + } + fmt.Printf(fmtString, diff("uptime"), diff("cpu_usage")*100/diff("uptime"), stats2["juicefs_memory"]/1024/1024) + } + bm.printResult(result) + return nil +} + +func benchFlags() *cli.Command { + return &cli.Command{ + Name: "bench", + Usage: "run benchmark to read/write/stat big/small files", + Action: bench, + ArgsUsage: "PATH", + Flags: []cli.Flag{ + &cli.UintFlag{ + Name: "block-size", + Value: 1, + Usage: "block size in MiB", + }, + &cli.UintFlag{ + Name: "big-file-size", + Value: 1024, + Usage: "size of big file in MiB", + }, + &cli.UintFlag{ + Name: "small-file-size", + Value: 128, + Usage: "size of small file in KiB", + }, + &cli.UintFlag{ + Name: "small-file-count", + Value: 100, + Usage: "number of small files", + }, + &cli.UintFlag{ + Name: "threads", + Aliases: []string{"p"}, + Value: 1, + Usage: "number of concurrent threads", + }, + }, + } +} diff --git a/cmd/bench_test.go b/cmd/bench_test.go new file mode 100644 index 0000000..4d76144 --- /dev/null +++ b/cmd/bench_test.go @@ -0,0 +1,44 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "os" + "testing" +) + +func TestBench(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + benchArgs := []string{"", "bench", mountpoint} + os.Setenv("SKIP_DROP_CACHES", "true") + defer os.Unsetenv("SKIP_DROP_CACHES") + err := Main(benchArgs) + if err != nil { + t.Fatalf("test bench failed: %v", err) + } +} diff --git a/cmd/config.go b/cmd/config.go new file mode 100644 index 0000000..8e83c67 --- /dev/null +++ b/cmd/config.go @@ -0,0 +1,187 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/urfave/cli/v2" +) + +func warn(format string, a ...interface{}) { + fmt.Printf("\033[1;33mWARNING\033[0m: "+format+"\n", a...) +} + +func userConfirmed() bool { + fmt.Print("Proceed anyway? [y/N]: ") + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + if text := strings.ToLower(scanner.Text()); text == "y" || text == "yes" { + return true + } else if text == "" || text == "n" || text == "no" { + return false + } else { + fmt.Print("Please input y(yes) or n(no): ") + } + } + return false +} + +func config(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true}) + + format, err := m.Load() + if err != nil { + return err + } + if len(ctx.LocalFlagNames()) == 0 { + format.RemoveSecret() + printJson(format) + return nil + } + + var quota, storage, trash bool + var msg strings.Builder + for _, flag := range ctx.LocalFlagNames() { + switch flag { + case "capacity": + if new := ctx.Uint64(flag); new != format.Capacity>>30 { + msg.WriteString(fmt.Sprintf("%10s: %d GiB -> %d GiB\n", flag, format.Capacity>>30, new)) + format.Capacity = new << 30 + quota = true + } + case "inodes": + if new := ctx.Uint64(flag); new != format.Inodes { + msg.WriteString(fmt.Sprintf("%10s: %d -> %d\n", flag, format.Inodes, new)) + format.Inodes = new + quota = true + } + case "bucket": + if new := ctx.String(flag); new != format.Bucket { + msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.Bucket, new)) + format.Bucket = new + storage = true + } + case "access-key": + if new := ctx.String(flag); new != format.AccessKey { + msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.AccessKey, new)) + format.AccessKey = new + storage = true + } + case "secret-key": + if new := ctx.String(flag); new != format.SecretKey { + msg.WriteString(fmt.Sprintf("%10s: updated\n", flag)) + format.SecretKey = new + storage = true + } + case "trash-days": + if new := ctx.Int(flag); new != format.TrashDays { + msg.WriteString(fmt.Sprintf("%10s: %d -> %d\n", flag, format.TrashDays, new)) + format.TrashDays = new + trash = true + } + } + } + if msg.Len() == 0 { + fmt.Println("Nothing changed.") + return nil + } + + if !ctx.Bool("force") { + if storage { + blob, err := createStorage(format) + if err != nil { + return err + } + if err = test(blob); err != nil { + return err + } + } + if quota { + var totalSpace, availSpace, iused, iavail uint64 + _ = m.StatFS(meta.Background, &totalSpace, &availSpace, &iused, &iavail) + usedSpace := totalSpace - availSpace + if format.Capacity > 0 && usedSpace >= format.Capacity || + format.Inodes > 0 && iused >= format.Inodes { + warn("New quota is too small (used / quota): %d / %d bytes, %d / %d inodes.", + usedSpace, format.Capacity, iused, format.Inodes) + if !userConfirmed() { + return fmt.Errorf("Aborted.") + } + } + } + if trash && format.TrashDays == 0 { + warn("The current trash will be emptied and future removed files will purged immediately.") + if !userConfirmed() { + return fmt.Errorf("Aborted.") + } + } + } + + if err = m.Init(*format, false); err == nil { + fmt.Println(msg.String()[:msg.Len()-1]) + } + return err +} + +func configFlags() *cli.Command { + return &cli.Command{ + Name: "config", + Usage: "change config of a volume", + ArgsUsage: "META-URL", + Action: config, + Flags: []cli.Flag{ + &cli.Uint64Flag{ + Name: "capacity", + Usage: "the limit for space in GiB", + }, + &cli.Uint64Flag{ + Name: "inodes", + Usage: "the limit for number of inodes", + }, + &cli.StringFlag{ + Name: "bucket", + Usage: "A bucket URL to store data", + }, + &cli.StringFlag{ + Name: "access-key", + Usage: "Access key for object storage", + }, + &cli.StringFlag{ + Name: "secret-key", + Usage: "Secret key for object storage", + }, + &cli.IntFlag{ + Name: "trash-days", + Usage: "number of days after which removed files will be permanently deleted", + }, + &cli.BoolFlag{ + Name: "force", + Usage: "skip sanity check and force update the configurations", + }, + }, + } +} diff --git a/cmd/config_test.go b/cmd/config_test.go new file mode 100644 index 0000000..606d390 --- /dev/null +++ b/cmd/config_test.go @@ -0,0 +1,82 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "encoding/json" + "os" + "testing" + + "github.com/agiledragon/gomonkey/v2" + "github.com/juicedata/juicefs/pkg/meta" +) + +func getStdout(args []string) ([]byte, error) { + tmp, err := os.CreateTemp("/tmp", "jfstest-*") + if err != nil { + return nil, err + } + defer tmp.Close() + defer os.Remove(tmp.Name()) + patch := gomonkey.ApplyGlobalVar(os.Stdout, *tmp) + defer patch.Reset() + + if err = Main(args); err != nil { + return nil, err + } + return os.ReadFile(tmp.Name()) +} + +func TestConfig(t *testing.T) { + metaUrl := "redis://localhost:6379/10" + ResetRedis(metaUrl) + if err := Main([]string{"", "format", metaUrl, "--bucket", "/tmp/testBucket", "test"}); err != nil { + t.Fatalf("format: %s", err) + } + + if err := Main([]string{"", "config", metaUrl, "--trash-days", "2"}); err != nil { + t.Fatalf("config: %s", err) + } + data, err := getStdout([]string{"", "config", metaUrl}) + if err != nil { + t.Fatalf("getStdout: %s", err) + } + var format meta.Format + if err = json.Unmarshal(data, &format); err != nil { + t.Fatalf("json unmarshal: %s", err) + } + if format.TrashDays != 2 { + t.Fatalf("trash-days %d != expect 2", format.TrashDays) + } + + if err = Main([]string{"", "config", metaUrl, "--capacity", "10", "--inodes", "1000000"}); err != nil { + t.Fatalf("config: %s", err) + } + if err = Main([]string{"", "config", metaUrl, "--bucket", "/tmp/newBucket", "--access-key", "testAK", "--secret-key", "testSK"}); err != nil { + t.Fatalf("config: %s", err) + } + if data, err = getStdout([]string{"", "config", metaUrl}); err != nil { + t.Fatalf("getStdout: %s", err) + } + if err = json.Unmarshal(data, &format); err != nil { + t.Fatalf("json unmarshal: %s", err) + } + if format.Capacity != 10<<30 || format.Inodes != 1000000 || + format.Bucket != "/tmp/newBucket" || format.AccessKey != "testAK" || format.SecretKey != "removed" { + t.Fatalf("unexpect format: %+v", format) + } +} diff --git a/cmd/destroy.go b/cmd/destroy.go new file mode 100644 index 0000000..3be0789 --- /dev/null +++ b/cmd/destroy.go @@ -0,0 +1,147 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "sort" + "sync" + + "github.com/juicedata/juicefs/pkg/meta" + osync "github.com/juicedata/juicefs/pkg/sync" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/urfave/cli/v2" +) + +func destroy(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 2 { + return fmt.Errorf("META-URL and UUID are required") + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true}) + + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + if uuid := ctx.Args().Get(1); uuid != format.UUID { + logger.Fatalf("UUID %s != expected %s", uuid, format.UUID) + } + + if !ctx.Bool("force") { + m.CleanStaleSessions() + sessions, err := m.ListSessions() + if err != nil { + logger.Fatalf("list sessions: %s", err) + } + if num := len(sessions); num > 0 { + logger.Fatalf("%d sessions are active, please disconnect them first", num) + } + var totalSpace, availSpace, iused, iavail uint64 + _ = m.StatFS(meta.Background, &totalSpace, &availSpace, &iused, &iavail) + + fmt.Printf(" volume name: %s\n", format.Name) + fmt.Printf(" volume UUID: %s\n", format.UUID) + fmt.Printf("data storage: %s://%s\n", format.Storage, format.Bucket) + fmt.Printf(" used bytes: %d\n", totalSpace-availSpace) + fmt.Printf(" used inodes: %d\n", iused) + warn("The target volume will be destoried permanently, including:") + warn("1. objects in the data storage") + warn("2. entries in the metadata engine") + if !userConfirmed() { + logger.Fatalln("Aborted.") + } + } + + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("create object storage: %s", err) + } + objs, err := osync.ListAll(blob, "", "") + if err != nil { + logger.Fatalf("list all objects: %s", err) + } + progress := utils.NewProgress(false, false) + spin := progress.AddCountSpinner("Deleted objects") + var failed int + var dirs []string + var mu sync.Mutex + var wg sync.WaitGroup + for i := 0; i < 8; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for obj := range objs { + if obj == nil { + break // failed listing + } + if obj.IsDir() { + mu.Lock() + dirs = append(dirs, obj.Key()) + mu.Unlock() + continue + } + if err := blob.Delete(obj.Key()); err == nil { + spin.Increment() + } else { + failed++ + logger.Warnf("delete %s: %s", obj.Key(), err) + } + } + }() + } + wg.Wait() + sort.Strings(dirs) + for i := len(dirs) - 1; i >= 0; i-- { + if err := blob.Delete(dirs[i]); err == nil { + spin.Increment() + } else { + failed++ + logger.Warnf("delete %s: %s", dirs[i], err) + } + } + progress.Done() + if progress.Quiet { + logger.Infof("Deleted %d objects", spin.Current()) + } + if failed > 0 { + logger.Errorf("%d objects are failed to delete, please do it manually.", failed) + } + + if err = m.Reset(); err != nil { + logger.Fatalf("reset meta: %s", err) + } + + logger.Infof("The volume has been destroyed! You may need to delete cache directory manually.") + return nil +} + +func destroyFlags() *cli.Command { + return &cli.Command{ + Name: "destroy", + Usage: "destroy an existing volume", + ArgsUsage: "META-URL UUID", + Action: destroy, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "force", + Usage: "skip sanity check and force destroy the volume", + }, + }, + } +} diff --git a/cmd/dump.go b/cmd/dump.go new file mode 100644 index 0000000..87a9f74 --- /dev/null +++ b/cmd/dump.go @@ -0,0 +1,66 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io" + "os" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/urfave/cli/v2" +) + +func dump(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + var fp io.WriteCloser + if ctx.Args().Len() == 1 { + fp = os.Stdout + } else { + var err error + fp, err = os.OpenFile(ctx.Args().Get(1), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return err + } + defer fp.Close() + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true, Subdir: ctx.String("subdir")}) + if err := m.DumpMeta(fp, 0); err != nil { + return err + } + logger.Infof("Dump metadata into %s succeed", ctx.Args().Get(1)) + return nil +} + +func dumpFlags() *cli.Command { + return &cli.Command{ + Name: "dump", + Usage: "dump metadata into a JSON file", + ArgsUsage: "META-URL [FILE]", + Action: dump, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "subdir", + Usage: "only dump a sub-directory.", + }, + }, + } +} diff --git a/cmd/dump_test.go b/cmd/dump_test.go new file mode 100644 index 0000000..12e19da --- /dev/null +++ b/cmd/dump_test.go @@ -0,0 +1,71 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "os" + "testing" + + "github.com/go-redis/redis/v8" +) + +func TestDumpAndLoad(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + opt, err := redis.ParseURL(metaUrl) + if err != nil { + t.Fatalf("ParseURL: %v", err) + } + rdb := redis.NewClient(opt) + rdb.FlushDB(context.Background()) + + t.Run("Test Load", func(t *testing.T) { + loadArgs := []string{"", "load", metaUrl, "./../pkg/meta/metadata.sample"} + err = Main(loadArgs) + if err != nil { + t.Fatalf("load failed: %v", err) + } + if rdb.DBSize(context.Background()).Val() == 0 { + t.Fatalf("load error: %v", err) + } + + }) + t.Run("Test dump", func(t *testing.T) { + dumpArgs := []string{"", "dump", metaUrl, "/tmp/dump_test.json"} + err := Main(dumpArgs) + if err != nil { + t.Fatalf("dump error: %v", err) + } + _, err = os.Stat("/tmp/dump_test.json") + if err != nil { + t.Fatalf("dump error: %v", err) + } + }) + + t.Run("Test dump with subdir", func(t *testing.T) { + dumpArgs := []string{"", "dump", metaUrl, "/tmp/dump_subdir_test.json", "--subdir", "d1"} + err := Main(dumpArgs) + if err != nil { + t.Fatalf("dump error: %v", err) + } + _, err = os.Stat("/tmp/dump_subdir_test.json") + if err != nil { + t.Fatalf("dump error: %v", err) + } + }) + rdb.FlushDB(context.Background()) +} diff --git a/cmd/format.go b/cmd/format.go new file mode 100644 index 0000000..26c9b01 --- /dev/null +++ b/cmd/format.go @@ -0,0 +1,318 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "math/rand" + _ "net/http/pprof" + "os" + "path" + "regexp" + "runtime" + "strings" + "time" + + "github.com/google/uuid" + "github.com/juicedata/juicefs/pkg/compress" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/version" + "github.com/urfave/cli/v2" +) + +func fixObjectSize(s int) int { + const min, max = 64, 16 << 10 + var bits uint + for s > 1 { + bits++ + s >>= 1 + } + s = s << bits + if s < min { + s = min + } else if s > max { + s = max + } + return s +} + +func createStorage(format *meta.Format) (object.ObjectStorage, error) { + object.UserAgent = "JuiceFS-" + version.Version() + var blob object.ObjectStorage + var err error + if format.Shards > 1 { + blob, err = object.NewSharded(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey, format.Shards) + } else { + blob, err = object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey) + } + if err != nil { + return nil, err + } + blob = object.WithPrefix(blob, format.Name+"/") + + if format.EncryptKey != "" { + passphrase := os.Getenv("JFS_RSA_PASSPHRASE") + privKey, err := object.ParseRsaPrivateKeyFromPem(format.EncryptKey, passphrase) + if err != nil { + return nil, fmt.Errorf("load private key: %s", err) + } + encryptor := object.NewAESEncryptor(object.NewRSAEncryptor(privKey)) + blob = object.NewEncrypted(blob, encryptor) + } + return blob, nil +} + +var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + +func randSeq(n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letters[rand.Intn(len(letters))] + } + return string(b) +} + +func doTesting(store object.ObjectStorage, key string, data []byte) error { + if err := store.Put(key, bytes.NewReader(data)); err != nil { + if strings.Contains(err.Error(), "Access Denied") { + return fmt.Errorf("Failed to put: %s", err) + } + if err2 := store.Create(); err2 != nil { + if strings.Contains(err.Error(), "NoSuchBucket") { + return fmt.Errorf("Failed to create bucket %s: %s, previous error: %s\nPlease create bucket %s manually, then format again.", + store, err2, err, store) + } else { + return fmt.Errorf("Failed to create bucket %s: %s, previous error: %s", + store, err2, err) + } + } + if err := store.Put(key, bytes.NewReader(data)); err != nil { + return fmt.Errorf("Failed to put: %s", err) + } + } + p, err := store.Get(key, 0, -1) + if err != nil { + return fmt.Errorf("Failed to get: %s", err) + } + data2, err := ioutil.ReadAll(p) + _ = p.Close() + if err != nil { + return err + } + if !bytes.Equal(data, data2) { + return fmt.Errorf("Read wrong data") + } + err = store.Delete(key) + if err != nil { + // it's OK to don't have delete permission + fmt.Printf("Failed to delete: %s", err) + } + return nil +} + +func test(store object.ObjectStorage) error { + rand.Seed(time.Now().UnixNano()) + key := "testing/" + randSeq(10) + data := make([]byte, 100) + _, _ = rand.Read(data) + nRetry := 3 + var err error + for i := 0; i < nRetry; i++ { + err = doTesting(store, key, data) + if err == nil { + return nil + } + time.Sleep(time.Second * time.Duration(i*3+1)) + } + return err +} + +func format(c *cli.Context) error { + setLoggerLevel(c) + if c.Args().Len() < 1 { + logger.Fatalf("Meta URL and name are required") + } + removePassword(c.Args().Get(0)) + m := meta.NewClient(c.Args().Get(0), &meta.Config{Retries: 2}) + + if c.Args().Len() < 2 { + logger.Fatalf("Please give it a name") + } + name := c.Args().Get(1) + validName := regexp.MustCompile(`^[a-z0-9][a-z0-9\-]{1,61}[a-z0-9]$`) + if !validName.MatchString(name) { + logger.Fatalf("invalid name: %s, only alphabet, number and - are allowed, and the length should be 3 to 63 characters.", name) + } + + compressor := compress.NewCompressor(c.String("compress")) + if compressor == nil { + logger.Fatalf("Unsupported compress algorithm: %s", c.String("compress")) + } + if c.Bool("no-update") { + if _, err := m.Load(); err == nil { + return nil + } + } + + format := meta.Format{ + Name: name, + UUID: uuid.New().String(), + Storage: c.String("storage"), + Bucket: c.String("bucket"), + AccessKey: c.String("access-key"), + SecretKey: c.String("secret-key"), + Shards: c.Int("shards"), + Capacity: c.Uint64("capacity") << 30, + Inodes: c.Uint64("inodes"), + BlockSize: fixObjectSize(c.Int("block-size")), + Compression: c.String("compress"), + TrashDays: c.Int("trash-days"), + } + if format.AccessKey == "" && os.Getenv("ACCESS_KEY") != "" { + format.AccessKey = os.Getenv("ACCESS_KEY") + _ = os.Unsetenv("ACCESS_KEY") + } + if format.SecretKey == "" && os.Getenv("SECRET_KEY") != "" { + format.SecretKey = os.Getenv("SECRET_KEY") + _ = os.Unsetenv("SECRET_KEY") + } + + if format.Storage == "file" && !strings.HasSuffix(format.Bucket, "/") { + format.Bucket += "/" + } + + keyPath := c.String("encrypt-rsa-key") + if keyPath != "" { + pem, err := ioutil.ReadFile(keyPath) + if err != nil { + logger.Fatalf("load RSA key from %s: %s", keyPath, err) + } + format.EncryptKey = string(pem) + } + + blob, err := createStorage(&format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + if os.Getenv("JFS_NO_CHECK_OBJECT_STORAGE") == "" { + if err := test(blob); err != nil { + logger.Fatalf("Storage %s is not configured correctly: %s", blob, err) + } + } + + if !c.Bool("force") && format.Compression == "none" { // default + if old, err := m.Load(); err == nil && old.Compression == "lz4" { // lz4 is the previous default algr + format.Compression = old.Compression // keep the existing default compress algr + } + } + err = m.Init(format, c.Bool("force")) + if err != nil { + logger.Fatalf("format: %s", err) + } + format.RemoveSecret() + logger.Infof("Volume is formatted as %+v", format) + return nil +} + +func formatFlags() *cli.Command { + var defaultBucket string + switch runtime.GOOS { + case "darwin": + homeDir, err := os.UserHomeDir() + if err != nil { + logger.Fatalf("%v", err) + } + defaultBucket = path.Join(homeDir, ".juicefs", "local") + case "windows": + defaultBucket = path.Join("C:/jfs/local") + default: + defaultBucket = "/var/jfs" + } + return &cli.Command{ + Name: "format", + Usage: "format a volume", + ArgsUsage: "META-URL NAME", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "block-size", + Value: 4096, + Usage: "size of block in KiB", + }, + &cli.Uint64Flag{ + Name: "capacity", + Value: 0, + Usage: "the limit for space in GiB", + }, + &cli.Uint64Flag{ + Name: "inodes", + Value: 0, + Usage: "the limit for number of inodes", + }, + &cli.StringFlag{ + Name: "compress", + Value: "none", + Usage: "compression algorithm (lz4, zstd, none)", + }, + &cli.IntFlag{ + Name: "shards", + Value: 0, + Usage: "store the blocks into N buckets by hash of key", + }, + &cli.StringFlag{ + Name: "storage", + Value: "file", + Usage: "Object storage type (e.g. s3, gcs, oss, cos)", + }, + &cli.StringFlag{ + Name: "bucket", + Value: defaultBucket, + Usage: "A bucket URL to store data", + }, + &cli.StringFlag{ + Name: "access-key", + Usage: "Access key for object storage (env ACCESS_KEY)", + }, + &cli.StringFlag{ + Name: "secret-key", + Usage: "Secret key for object storage (env SECRET_KEY)", + }, + &cli.StringFlag{ + Name: "encrypt-rsa-key", + Usage: "A path to RSA private key (PEM)", + }, + &cli.IntFlag{ + Name: "trash-days", + Value: 1, + Usage: "number of days after which removed files will be permanently deleted", + }, + + &cli.BoolFlag{ + Name: "force", + Usage: "overwrite existing format", + }, + &cli.BoolFlag{ + Name: "no-update", + Usage: "don't update existing volume", + }, + }, + Action: format, + } +} diff --git a/cmd/format_test.go b/cmd/format_test.go new file mode 100644 index 0000000..c3159c4 --- /dev/null +++ b/cmd/format_test.go @@ -0,0 +1,94 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "encoding/json" + "testing" + + "github.com/juicedata/juicefs/pkg/meta" + + "github.com/go-redis/redis/v8" +) + +func TestFixObjectSize(t *testing.T) { + t.Run("Should make sure the size is in range", func(t *testing.T) { + cases := []struct { + input, expected int + }{ + {30, 64}, + {0, 64}, + {2 << 30, 16 << 10}, + {16 << 11, 16 << 10}, + } + for _, c := range cases { + if size := fixObjectSize(c.input); size != c.expected { + t.Fatalf("Expected %d, got %d", c.expected, size) + } + } + }) + t.Run("Should use powers of two", func(t *testing.T) { + cases := []struct { + input, expected int + }{ + {150, 128}, + {99, 64}, + {1077, 1024}, + } + for _, c := range cases { + if size := fixObjectSize(c.input); size != c.expected { + t.Fatalf("Expected %d, got %d", c.expected, size) + } + } + }) +} + +func TestFormat(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + opt, err := redis.ParseURL(metaUrl) + if err != nil { + t.Fatalf("ParseURL: %v", err) + } + rdb := redis.NewClient(opt) + ctx := context.Background() + rdb.FlushDB(ctx) + defer rdb.FlushDB(ctx) + name := "test" + formatArgs := []string{"", "format", "--storage", "file", "--bucket", "/tmp/testMountDir", metaUrl, name} + err = Main(formatArgs) + if err != nil { + t.Fatalf("format error: %v", err) + } + body, err := rdb.Get(ctx, "setting").Bytes() + if err == redis.Nil { + t.Fatalf("database is not formatted") + } + if err != nil { + t.Fatalf("database is not formatted") + } + f := meta.Format{} + err = json.Unmarshal(body, &f) + if err != nil { + t.Fatalf("database formatted error: %v", err) + } + + if f.Name != name { + t.Fatalf("database formatted error: %v", err) + } + +} diff --git a/cmd/fsck.go b/cmd/fsck.go new file mode 100644 index 0000000..3216514 --- /dev/null +++ b/cmd/fsck.go @@ -0,0 +1,164 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "sort" + "strings" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + osync "github.com/juicedata/juicefs/pkg/sync" + "github.com/juicedata/juicefs/pkg/utils" + + "github.com/urfave/cli/v2" +) + +func checkFlags() *cli.Command { + return &cli.Command{ + Name: "fsck", + Usage: "Check consistency of file system", + ArgsUsage: "META-URL", + Action: fsck, + } +} + +func fsck(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true}) + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + + GetTimeout: time.Second * 60, + PutTimeout: time.Second * 60, + MaxUpload: 20, + BufferSize: 300 << 20, + CacheDir: "memory", + } + + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + blob = object.WithPrefix(blob, "chunks/") + objs, err := osync.ListAll(blob, "", "") + if err != nil { + logger.Fatalf("list all blocks: %s", err) + } + + // Find all blocks in object storage + progress := utils.NewProgress(false, false) + blockDSpin := progress.AddDoubleSpinner("Found blocks") + var blocks = make(map[string]int64) + for obj := range objs { + if obj == nil { + break // failed listing + } + if obj.IsDir() { + continue + } + + logger.Debugf("found block %s", obj.Key()) + parts := strings.Split(obj.Key(), "/") + if len(parts) != 3 { + continue + } + name := parts[2] + blocks[name] = obj.Size() + blockDSpin.IncrInt64(obj.Size()) + } + blockDSpin.Done() + if progress.Quiet { + c, b := blockDSpin.Current() + logger.Infof("Found %d blocks (%d bytes)", c, b) + } + + // List all slices in metadata engine + sliceCSpin := progress.AddCountSpinner("Listed slices") + var c = meta.NewContext(0, 0, []uint32{0}) + slices := make(map[meta.Ino][]meta.Slice) + r := m.ListSlices(c, slices, false, sliceCSpin.Increment) + if r != 0 { + logger.Fatalf("list all slices: %s", r) + } + sliceCSpin.Done() + + // Scan all slices to find lost blocks + sliceCBar := progress.AddCountBar("Scanned slices", sliceCSpin.Current()) + sliceBSpin := progress.AddByteSpinner("Scanned slices") + lostDSpin := progress.AddDoubleSpinner("Lost blocks") + brokens := make(map[meta.Ino]string) + for inode, ss := range slices { + for _, s := range ss { + n := (s.Size - 1) / uint32(chunkConf.BlockSize) + for i := uint32(0); i <= n; i++ { + sz := chunkConf.BlockSize + if i == n { + sz = int(s.Size) - int(i)*chunkConf.BlockSize + } + key := fmt.Sprintf("%d_%d_%d", s.Chunkid, i, sz) + if _, ok := blocks[key]; !ok { + if _, err := blob.Head(key); err != nil { + if _, ok := brokens[inode]; !ok { + if p, st := meta.GetPath(m, meta.Background, inode); st == 0 { + brokens[inode] = p + } else { + logger.Warnf("getpath of inode %d: %s", inode, st) + brokens[inode] = st.Error() + } + } + logger.Errorf("can't find block %s for file %s: %s", key, brokens[inode], err) + lostDSpin.IncrInt64(int64(sz)) + } + } + } + sliceCBar.Increment() + sliceBSpin.IncrInt64(int64(s.Size)) + } + } + progress.Done() + if progress.Quiet { + logger.Infof("Used by %d slices (%d bytes)", sliceCBar.Current(), sliceBSpin.Current()) + } + if lc, lb := lostDSpin.Current(); lc > 0 { + msg := fmt.Sprintf("%d objects are lost (%d bytes), %d broken files:\n", lc, lb, len(brokens)) + msg += fmt.Sprintf("%13s: PATH\n", "INODE") + var fileList []string + for i, p := range brokens { + fileList = append(fileList, fmt.Sprintf("%13d: %s", i, p)) + } + sort.Strings(fileList) + msg += strings.Join(fileList, "\n") + logger.Fatal(msg) + } + + return nil +} diff --git a/cmd/fsck_test.go b/cmd/fsck_test.go new file mode 100644 index 0000000..114aa58 --- /dev/null +++ b/cmd/fsck_test.go @@ -0,0 +1,51 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "testing" +) + +func TestFsck(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + for i := 0; i < 10; i++ { + filename := fmt.Sprintf("%s/f%d.txt", mountpoint, i) + err := ioutil.WriteFile(filename, []byte("test"), 0644) + if err != nil { + t.Fatalf("mount failed: %v", err) + } + } + + fsckArgs := []string{"", "fsck", metaUrl} + err := Main(fsckArgs) + if err != nil { + t.Fatalf("fsck failed: %v", err) + } +} diff --git a/cmd/gateway.go b/cmd/gateway.go new file mode 100644 index 0000000..df2d1fb --- /dev/null +++ b/cmd/gateway.go @@ -0,0 +1,269 @@ +//go:build !nogateway +// +build !nogateway + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package main + +import ( + "path/filepath" + + "github.com/juicedata/juicefs/pkg/metric" + + _ "net/http/pprof" + "os" + "strings" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + jfsgateway "github.com/juicedata/juicefs/pkg/gateway" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/usage" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/version" + "github.com/juicedata/juicefs/pkg/vfs" + "github.com/urfave/cli/v2" + + mcli "github.com/minio/cli" + minio "github.com/minio/minio/cmd" + "github.com/minio/minio/pkg/auth" +) + +func gatewayFlags() *cli.Command { + flags := append(clientFlags(), + &cli.Float64Flag{ + Name: "attr-cache", + Value: 1.0, + Usage: "attributes cache timeout in seconds", + }, + &cli.Float64Flag{ + Name: "entry-cache", + Value: 0, + Usage: "file entry cache timeout in seconds", + }, + &cli.Float64Flag{ + Name: "dir-entry-cache", + Value: 1.0, + Usage: "dir entry cache timeout in seconds", + }, + &cli.StringFlag{ + Name: "access-log", + Usage: "path for JuiceFS access log", + }, + &cli.StringFlag{ + Name: "metrics", + Value: "127.0.0.1:9567", + Usage: "address to export metrics", + }, + &cli.StringFlag{ + Name: "consul", + Value: "127.0.0.1:8500", + Usage: "consul address to register", + }, + &cli.BoolFlag{ + Name: "no-usage-report", + Usage: "do not send usage report", + }, + &cli.BoolFlag{ + Name: "no-banner", + Usage: "disable MinIO startup information", + }, + &cli.BoolFlag{ + Name: "multi-buckets", + Usage: "use top level of directories as buckets", + }, + &cli.BoolFlag{ + Name: "keep-etag", + Usage: "keep the ETag for uploaded objects", + }) + return &cli.Command{ + Name: "gateway", + Usage: "S3-compatible gateway", + ArgsUsage: "META-URL ADDRESS", + Flags: flags, + Action: gateway, + } +} + +func gateway(c *cli.Context) error { + setLoggerLevel(c) + + if c.Args().Len() < 2 { + logger.Fatalf("Meta URL and listen address are required") + } + + ak := os.Getenv("MINIO_ROOT_USER") + if ak == "" { + ak = os.Getenv("MINIO_ACCESS_KEY") + } + if len(ak) < 3 { + logger.Fatalf("MINIO_ROOT_USER should be specified as an environment variable with at least 3 characters") + } + sk := os.Getenv("MINIO_ROOT_PASSWORD") + if sk == "" { + sk = os.Getenv("MINIO_SECRET_KEY") + } + if len(sk) < 8 { + logger.Fatalf("MINIO_ROOT_PASSWORD should be specified as an environment variable with at least 8 characters") + } + + address := c.Args().Get(1) + gw = &GateWay{c} + + args := []string{"gateway", "--address", address, "--anonymous"} + if c.Bool("no-banner") { + args = append(args, "--quiet") + } + app := &mcli.App{ + Action: gateway2, + Flags: []mcli.Flag{ + mcli.StringFlag{ + Name: "address", + Value: ":9000", + Usage: "bind to a specific ADDRESS:PORT, ADDRESS can be an IP or hostname", + }, + mcli.BoolFlag{ + Name: "anonymous", + Usage: "hide sensitive information from logging", + }, + mcli.BoolFlag{ + Name: "json", + Usage: "output server logs and startup information in json format", + }, + mcli.BoolFlag{ + Name: "quiet", + Usage: "disable MinIO startup information", + }, + }, + } + return app.Run(args) +} + +var gw *GateWay + +func gateway2(ctx *mcli.Context) error { + minio.StartGateway(ctx, gw) + return nil +} + +type GateWay struct { + ctx *cli.Context +} + +func (g *GateWay) Name() string { + return "JuiceFS" +} + +func (g *GateWay) Production() bool { + return true +} + +func (g *GateWay) NewGatewayLayer(creds auth.Credentials) (minio.ObjectLayer, error) { + c := g.ctx + addr := c.Args().Get(0) + removePassword(addr) + m := meta.NewClient(addr, &meta.Config{ + Retries: 10, + Strict: true, + ReadOnly: c.Bool("read-only"), + OpenCache: time.Duration(c.Float64("open-cache") * 1e9), + MountPoint: "s3gateway", + Subdir: c.String("subdir"), + MaxDeletes: c.Int("max-deletes"), + }) + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + wrapRegister("s3gateway", format.Name) + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + + GetTimeout: time.Second * time.Duration(c.Int("get-timeout")), + PutTimeout: time.Second * time.Duration(c.Int("put-timeout")), + MaxUpload: c.Int("max-uploads"), + Writeback: c.Bool("writeback"), + Prefetch: c.Int("prefetch"), + BufferSize: c.Int("buffer-size") << 20, + UploadLimit: c.Int64("upload-limit") * 1e6 / 8, + DownloadLimit: c.Int64("download-limit") * 1e6 / 8, + + CacheDir: c.String("cache-dir"), + CacheSize: int64(c.Int("cache-size")), + FreeSpace: float32(c.Float64("free-space-ratio")), + CacheMode: os.FileMode(0600), + CacheFullBlock: !c.Bool("cache-partial-only"), + AutoCreate: true, + } + if chunkConf.CacheDir != "memory" { + ds := utils.SplitDir(chunkConf.CacheDir) + for i := range ds { + ds[i] = filepath.Join(ds[i], format.UUID) + } + chunkConf.CacheDir = strings.Join(ds, string(os.PathListSeparator)) + } + if c.IsSet("bucket") { + format.Bucket = c.String("bucket") + } + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + + store := chunk.NewCachedStore(blob, chunkConf) + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + chunkid := args[0].(uint64) + length := args[1].(uint32) + return store.Remove(chunkid, int(length)) + }) + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + }) + err = m.NewSession() + if err != nil { + logger.Fatalf("new session: %s", err) + } + + conf := &vfs.Config{ + Meta: &meta.Config{ + Retries: 10, + }, + Format: format, + Version: version.Version(), + AttrTimeout: time.Millisecond * time.Duration(c.Float64("attr-cache")*1000), + EntryTimeout: time.Millisecond * time.Duration(c.Float64("entry-cache")*1000), + DirEntryTimeout: time.Millisecond * time.Duration(c.Float64("dir-entry-cache")*1000), + AccessLog: c.String("access-log"), + Chunk: &chunkConf, + } + + metricsAddr := exposeMetrics(m, c) + if c.IsSet("consul") { + metric.RegisterToConsul(c.String("consul"), metricsAddr, "s3gateway") + } + if d := c.Duration("backup-meta"); d > 0 { + go vfs.Backup(m, blob, d) + } + if !c.Bool("no-usage-report") { + go usage.ReportUsage(m, "gateway "+version.Version()) + } + return jfsgateway.NewJFSGateway(conf, m, store, c.Bool("multi-buckets"), c.Bool("keep-etag")) +} diff --git a/cmd/gateway_noop.go b/cmd/gateway_noop.go new file mode 100644 index 0000000..da14666 --- /dev/null +++ b/cmd/gateway_noop.go @@ -0,0 +1,35 @@ +//go:build nogateway +// +build nogateway + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package main + +import ( + "errors" + + "github.com/urfave/cli/v2" +) + +func gatewayFlags() *cli.Command { + return &cli.Command{ + Name: "gateway", + Usage: "S3-compatible gateway (not included)", + Action: func(*cli.Context) error { + return errors.New("not supported") + }, + } +} diff --git a/cmd/gc.go b/cmd/gc.go new file mode 100644 index 0000000..04f0076 --- /dev/null +++ b/cmd/gc.go @@ -0,0 +1,297 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + osync "github.com/juicedata/juicefs/pkg/sync" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/vfs" + + "github.com/urfave/cli/v2" +) + +func gcFlags() *cli.Command { + return &cli.Command{ + Name: "gc", + Usage: "collect any leaked objects", + ArgsUsage: "META-URL", + Action: gc, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "delete", + Usage: "deleted leaked objects", + }, + &cli.BoolFlag{ + Name: "compact", + Usage: "compact small slices into bigger ones", + }, + &cli.IntFlag{ + Name: "threads", + Value: 10, + Usage: "number threads to delete leaked objects", + }, + }, + } +} + +type dChunk struct { + chunkid uint64 + length uint32 +} + +func gc(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{ + Retries: 10, + Strict: true, + MaxDeletes: ctx.Int("threads"), + }) + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + + GetTimeout: time.Second * 60, + PutTimeout: time.Second * 60, + MaxUpload: 20, + BufferSize: 300 << 20, + CacheDir: "memory", + } + + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + store := chunk.NewCachedStore(blob, chunkConf) + + // Scan all chunks first and do compaction if necessary + progress := utils.NewProgress(false, false) + if ctx.Bool("compact") { + bar := progress.AddCountBar("Scanned chunks", 0) + spin := progress.AddDoubleSpinner("Compacted slices") + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + return store.Remove(args[0].(uint64), int(args[1].(uint32))) + }) + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + err := vfs.Compact(chunkConf, store, slices, args[1].(uint64)) + for _, s := range slices { + spin.IncrInt64(int64(s.Len)) + } + return err + }) + if st := m.CompactAll(meta.Background, bar); st == 0 { + bar.Done() + spin.Done() + if progress.Quiet { + c, b := spin.Current() + logger.Infof("Compacted %d chunks (%d slices, %d bytes).", bar.Current(), c, b) + } + } else { + logger.Errorf("compact all chunks: %s", st) + } + } else { + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + return nil // ignore compaction + }) + } + + // put it above delete count spinner + sliceCSpin := progress.AddCountSpinner("Listed slices") + + // Delete pending chunks while listing slices + delete := ctx.Bool("delete") + var delSpin *utils.Bar + var chunkChan chan *dChunk // pending delete chunks + var wg sync.WaitGroup + if delete { + delSpin = progress.AddCountSpinner("Deleted pending") + chunkChan = make(chan *dChunk, 10240) + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + delSpin.Increment() + chunkChan <- &dChunk{args[0].(uint64), args[1].(uint32)} + return nil + }) + for i := 0; i < ctx.Int("threads"); i++ { + wg.Add(1) + go func() { + defer wg.Done() + for c := range chunkChan { + if err := store.Remove(c.chunkid, int(c.length)); err != nil { + logger.Warnf("remove %d_%d: %s", c.chunkid, c.length, err) + } + } + }() + } + } + + // List all slices in metadata engine + var c = meta.NewContext(0, 0, []uint32{0}) + slices := make(map[meta.Ino][]meta.Slice) + r := m.ListSlices(c, slices, delete, sliceCSpin.Increment) + if r != 0 { + logger.Fatalf("list all slices: %s", r) + } + if delete { + close(chunkChan) + wg.Wait() + delSpin.Done() + if progress.Quiet { + logger.Infof("Deleted %d pending chunks", delSpin.Current()) + } + } + sliceCSpin.Done() + + // Scan all objects to find leaked ones + blob = object.WithPrefix(blob, "chunks/") + objs, err := osync.ListAll(blob, "", "") + if err != nil { + logger.Fatalf("list all blocks: %s", err) + } + keys := make(map[uint64]uint32) + var total int64 + var totalBytes uint64 + for _, ss := range slices { + for _, s := range ss { + keys[s.Chunkid] = s.Size + total += int64(int(s.Size-1)/chunkConf.BlockSize) + 1 // s.Size should be > 0 + totalBytes += uint64(s.Size) + } + } + if progress.Quiet { + logger.Infof("using %d slices (%d bytes)", len(keys), totalBytes) + } + + bar := progress.AddCountBar("Scanned objects", total) + valid := progress.AddDoubleSpinner("Valid objects") + leaked := progress.AddDoubleSpinner("Leaked objects") + skipped := progress.AddDoubleSpinner("Skipped objects") + maxMtime := time.Now().Add(time.Hour * -1) + strDuration := os.Getenv("JFS_GC_SKIPPEDTIME") + if strDuration != "" { + iDuration, err := strconv.Atoi(strDuration) + if err == nil { + maxMtime = time.Now().Add(time.Second * -1 * time.Duration(iDuration)) + } else { + logger.Errorf("parse JFS_GC_SKIPPEDTIME=%s: %s", strDuration, err) + } + } + + var leakedObj = make(chan string, 10240) + for i := 0; i < ctx.Int("threads"); i++ { + wg.Add(1) + go func() { + defer wg.Done() + for key := range leakedObj { + if err := blob.Delete(key); err != nil { + logger.Warnf("delete %s: %s", key, err) + } + } + }() + } + + foundLeaked := func(obj object.Object) { + bar.IncrTotal(1) + leaked.IncrInt64(obj.Size()) + if delete { + leakedObj <- obj.Key() + } + } + + for obj := range objs { + if obj == nil { + break // failed listing + } + if obj.IsDir() { + continue + } + if obj.Mtime().After(maxMtime) || obj.Mtime().Unix() == 0 { + logger.Debugf("ignore new block: %s %s", obj.Key(), obj.Mtime()) + bar.Increment() + skipped.IncrInt64(obj.Size()) + continue + } + + logger.Debugf("found block %s", obj.Key()) + parts := strings.Split(obj.Key(), "/") + if len(parts) != 3 { + continue + } + name := parts[2] + parts = strings.Split(name, "_") + if len(parts) != 3 { + continue + } + bar.Increment() + cid, _ := strconv.Atoi(parts[0]) + size := keys[uint64(cid)] + if size == 0 { + logger.Debugf("find leaked object: %s, size: %d", obj.Key(), obj.Size()) + foundLeaked(obj) + continue + } + indx, _ := strconv.Atoi(parts[1]) + csize, _ := strconv.Atoi(parts[2]) + if csize == chunkConf.BlockSize { + if (indx+1)*csize > int(size) { + logger.Warnf("size of slice %d is larger than expected: %d > %d", cid, indx*chunkConf.BlockSize+csize, size) + foundLeaked(obj) + } else { + valid.IncrInt64(obj.Size()) + } + } else { + if indx*chunkConf.BlockSize+csize != int(size) { + logger.Warnf("size of slice %d is %d, but expect %d", cid, indx*chunkConf.BlockSize+csize, size) + foundLeaked(obj) + } else { + valid.IncrInt64(obj.Size()) + } + } + } + close(leakedObj) + wg.Wait() + progress.Done() + + vc, _ := valid.Current() + lc, lb := leaked.Current() + sc, sb := skipped.Current() + logger.Infof("scanned %d objects, %d valid, %d leaked (%d bytes), %d skipped (%d bytes)", + bar.Current(), vc, lc, lb, sc, sb) + if lc > 0 && !delete { + logger.Infof("Please add `--delete` to clean leaked objects") + } + return nil +} diff --git a/cmd/gc_test.go b/cmd/gc_test.go new file mode 100644 index 0000000..10fd9f8 --- /dev/null +++ b/cmd/gc_test.go @@ -0,0 +1,112 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "strings" + "testing" + "time" +) + +func WriteLeakedData(dataDir string) { + var templateContent = "aaaaaaaabbbbbbbb" + var writeContent strings.Builder + for i := 0; i < 64*1024; i++ { + writeContent.Write([]byte(templateContent)) + } + ioutil.WriteFile(dataDir+"chunks/0/0/"+"123456789_0_1048576", []byte(writeContent.String()), 0644) +} + +func CheckLeakedData(dataDir string) bool { + _,err := os.Stat(dataDir+"chunks/0/0/"+"123456789_0_1048576") + if err != nil { + return true + } + return false +} + +func RemoveAllFiles(dataDir string) { + _, err := os.Stat(dataDir) + if err == nil { + files, err := ioutil.ReadDir(dataDir) + if err == nil { + for _, f := range files { + os.RemoveAll(path.Join([]string{dataDir, f.Name()}...)) + } + } + } +} + +func TestGc(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + dataDir := "/tmp/testMountDir/test/" + RemoveAllFiles(dataDir + "chunks/") + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + for i := 0; i < 10; i++ { + filename := fmt.Sprintf("%s/f%d.txt", mountpoint, i) + err := ioutil.WriteFile(filename, []byte("test"), 0644) + if err != nil { + t.Fatalf("mount failed: %v", err) + } + } + + strEnvSkippedTime := os.Getenv("JFS_GC_SKIPPEDTIME") + t.Logf("JFS_GC_SKIPPEDTIME is %s", strEnvSkippedTime) + + WriteLeakedData(dataDir) + time.Sleep(time.Duration(3) * time.Second) + + gcArgs := []string{ + "", + "gc", + "--delete", + metaUrl, + } + err := Main(gcArgs) + if err != nil { + t.Fatalf("gc failed: %v", err) + } + + bNotExist := CheckLeakedData(dataDir) + if bNotExist == false { + t.Fatalf("gc delete failed,leaked data was not deleted") + } + + gcArgs = []string{ + "", + "gc", + metaUrl, + } + err = Main(gcArgs) + if err != nil { + t.Fatalf("gc failed: %v", err) + } +} diff --git a/cmd/info.go b/cmd/info.go new file mode 100644 index 0000000..d0df663 --- /dev/null +++ b/cmd/info.go @@ -0,0 +1,123 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "syscall" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/urfave/cli/v2" +) + +func infoFlags() *cli.Command { + return &cli.Command{ + Name: "info", + Usage: "show internal information for paths or inodes", + ArgsUsage: "PATH or INODE", + Action: info, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "inode", + Aliases: []string{"i"}, + Usage: "use inode instead of path (current dir should be inside JuiceFS)", + }, + &cli.BoolFlag{ + Name: "recursive", + Aliases: []string{"r"}, + Usage: "get summary of directories recursively (NOTE: it may take a long time for huge trees)", + }, + }, + } +} + +func info(ctx *cli.Context) error { + if runtime.GOOS == "windows" { + logger.Infof("Windows is not supported") + return nil + } + if ctx.Args().Len() < 1 { + logger.Infof("DIR or FILE is needed") + return nil + } + var recursive uint8 + if ctx.Bool("recursive") { + recursive = 1 + } + for i := 0; i < ctx.Args().Len(); i++ { + path := ctx.Args().Get(i) + var d string + var inode uint64 + var err error + if ctx.Bool("inode") { + inode, err = strconv.ParseUint(path, 10, 64) + d, _ = os.Getwd() + } else { + d, err = filepath.Abs(path) + if err != nil { + logger.Fatalf("abs of %s: %s", path, err) + } + inode, err = utils.GetFileInode(d) + } + if err != nil { + logger.Errorf("lookup inode for %s: %s", path, err) + continue + } + + f := openController(d) + if f == nil { + logger.Errorf("%s is not inside JuiceFS", path) + continue + } + + wb := utils.NewBuffer(8 + 9) + wb.Put32(meta.Info) + wb.Put32(9) + wb.Put64(inode) + wb.Put8(recursive) + _, err = f.Write(wb.Bytes()) + if err != nil { + logger.Fatalf("write message: %s", err) + } + + data := make([]byte, 4) + n, err := f.Read(data) + if err != nil { + logger.Fatalf("read size: %d %s", n, err) + } + if n == 1 && data[0] == byte(syscall.EINVAL&0xff) { + logger.Fatalf("info is not supported, please upgrade and mount again") + } + r := utils.ReadBuffer(data) + size := r.Get32() + data = make([]byte, size) + n, err = f.Read(data) + if err != nil { + logger.Fatalf("read info: %s", err) + } + fmt.Println(path, ":") + fmt.Println(string(data[:n])) + _ = f.Close() + } + + return nil +} diff --git a/cmd/info_test.go b/cmd/info_test.go new file mode 100644 index 0000000..9862d31 --- /dev/null +++ b/cmd/info_test.go @@ -0,0 +1,89 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/agiledragon/gomonkey/v2" + . "github.com/smartystreets/goconvey/convey" +) + +func TestInfo(t *testing.T) { + Convey("TestInfo", t, func() { + Convey("TestInfo", func() { + + var res string + tmpFile, err := os.CreateTemp("/tmp", "") + if err != nil { + t.Fatalf("creat tmp file failed: %v", err) + } + defer os.Remove(tmpFile.Name()) + if err != nil { + t.Fatalf("create temporary file: %v", err) + } + // mock os.Stdout + patches := gomonkey.ApplyGlobalVar(os.Stdout, *tmpFile) + defer patches.Reset() + + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + + err = os.MkdirAll(fmt.Sprintf("%s/dir1", mountpoint), 0777) + if err != nil { + t.Fatalf("mount failed: %v", err) + } + for i := 0; i < 10; i++ { + filename := fmt.Sprintf("%s/dir1/f%d.txt", mountpoint, i) + err := ioutil.WriteFile(filename, []byte("test"), 0644) + if err != nil { + t.Fatalf("mount failed: %v", err) + } + } + + infoArgs := []string{"", "info", fmt.Sprintf("%s/dir1", mountpoint)} + err = Main(infoArgs) + if err != nil { + t.Fatalf("info failed: %v", err) + } + content, err := ioutil.ReadFile(tmpFile.Name()) + if err != nil { + t.Fatalf("readFile failed: %v", err) + } + res = string(content) + var answer = `/tmp/testDir/dir1: inode: 2 files: 10 dirs: 1 length: 40 size: 45056` + replacer := strings.NewReplacer("\n", "", " ", "") + res = replacer.Replace(res) + answer = replacer.Replace(answer) + So(res, ShouldEqual, answer) + }) + }) +} diff --git a/cmd/load.go b/cmd/load.go new file mode 100644 index 0000000..2be44a3 --- /dev/null +++ b/cmd/load.go @@ -0,0 +1,60 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io" + "os" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/urfave/cli/v2" +) + +func load(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + var fp io.ReadCloser + if ctx.Args().Len() == 1 { + fp = os.Stdin + } else { + var err error + fp, err = os.Open(ctx.Args().Get(1)) + if err != nil { + return err + } + defer fp.Close() + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true}) + if err := m.LoadMeta(fp); err != nil { + return err + } + logger.Infof("Load metadata from %s succeed", ctx.Args().Get(1)) + return nil +} + +func loadFlags() *cli.Command { + return &cli.Command{ + Name: "load", + Usage: "load metadata from a previously dumped JSON file", + ArgsUsage: "META-URL [FILE]", + Action: load, + } +} diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..4f680bd --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,311 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "log" + "net/http" + _ "net/http/pprof" + "os" + "strings" + + "github.com/erikdubbelboer/gspt" + "github.com/google/gops/agent" + "github.com/sirupsen/logrus" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/version" + "github.com/urfave/cli/v2" +) + +var logger = utils.GetLogger("juicefs") + +func globalFlags() []cli.Flag { + return []cli.Flag{ + &cli.BoolFlag{ + Name: "verbose", + Aliases: []string{"debug", "v"}, + Usage: "enable debug log", + }, + &cli.BoolFlag{ + Name: "quiet", + Aliases: []string{"q"}, + Usage: "only warning and errors", + }, + &cli.BoolFlag{ + Name: "trace", + Usage: "enable trace log", + }, + &cli.BoolFlag{ + Name: "no-agent", + Usage: "Disable pprof (:6060) and gops (:6070) agent", + }, + &cli.BoolFlag{ + Name: "no-color", + Usage: "disable colors", + }, + } +} + +func Main(args []string) error { + cli.VersionFlag = &cli.BoolFlag{ + Name: "version", Aliases: []string{"V"}, + Usage: "print only the version", + } + app := &cli.App{ + Name: "juicefs", + Usage: "A POSIX file system built on Redis and object storage.", + Version: version.Version(), + Copyright: "Apache License 2.0", + EnableBashCompletion: true, + Flags: globalFlags(), + Commands: []*cli.Command{ + formatFlags(), + mountFlags(), + umountFlags(), + gatewayFlags(), + syncFlags(), + rmrFlags(), + infoFlags(), + benchFlags(), + gcFlags(), + checkFlags(), + profileFlags(), + statsFlags(), + statusFlags(), + warmupFlags(), + dumpFlags(), + loadFlags(), + configFlags(), + destroyFlags(), + }, + } + + // Called via mount or fstab. + if strings.HasSuffix(args[0], "/mount.juicefs") { + if newArgs, err := handleSysMountArgs(args); err != nil { + log.Fatal(err) + } else { + args = newArgs + } + } + + return app.Run(reorderOptions(app, args)) + +} + +func main() { + err := Main(os.Args) + if err != nil { + log.Fatal(err) + } +} + +func handleSysMountArgs(args []string) ([]string, error) { + optionToCmdFlag := map[string]string{ + "attrcacheto": "attr-cache", + "entrycacheto": "entry-cache", + "direntrycacheto": "dir-entry-cache", + } + newArgs := []string{"juicefs", "mount", "-d"} + mountOptions := args[3:] + sysOptions := []string{"_netdev", "rw", "defaults", "remount"} + fuseOptions := make([]string, 0, 20) + cmdFlagsLookup := make(map[string]bool, 20) + for _, f := range append(mountFlags().Flags, globalFlags()...) { + if names := f.Names(); len(names) > 0 && len(names[0]) > 1 { + _, cmdFlagsLookup[names[0]] = f.(*cli.BoolFlag) + } + } + + parseFlag := false + for _, option := range mountOptions { + if option == "-o" { + parseFlag = true + continue + } + if !parseFlag { + continue + } + + opts := strings.Split(option, ",") + for _, opt := range opts { + opt = strings.TrimSpace(opt) + if opt == "" || stringContains(sysOptions, opt) { + continue + } + // Lower case option name is preferred, but if it's the same as flag name, we also accept it + if strings.Contains(opt, "=") { + fields := strings.SplitN(opt, "=", 2) + if flagName, ok := optionToCmdFlag[fields[0]]; ok { + newArgs = append(newArgs, fmt.Sprintf("--%s=%s", flagName, fields[1])) + } else if isBool, ok := cmdFlagsLookup[fields[0]]; ok && !isBool { + newArgs = append(newArgs, fmt.Sprintf("--%s=%s", fields[0], fields[1])) + } else { + fuseOptions = append(fuseOptions, opt) + } + } else if flagName, ok := optionToCmdFlag[opt]; ok { + newArgs = append(newArgs, fmt.Sprintf("--%s", flagName)) + } else if isBool, ok := cmdFlagsLookup[opt]; ok && isBool { + newArgs = append(newArgs, fmt.Sprintf("--%s", opt)) + if opt == "debug" { + fuseOptions = append(fuseOptions, opt) + } + } else { + fuseOptions = append(fuseOptions, opt) + } + } + + parseFlag = false + } + if len(fuseOptions) > 0 { + newArgs = append(newArgs, "-o", strings.Join(fuseOptions, ",")) + } + newArgs = append(newArgs, args[1], args[2]) + logger.Debug("Parsed mount args: ", strings.Join(newArgs, " ")) + return newArgs, nil +} + +func stringContains(s []string, e string) bool { + for _, item := range s { + if item == e { + return true + } + } + return false +} + +func isFlag(flags []cli.Flag, option string) (bool, bool) { + if !strings.HasPrefix(option, "-") { + return false, false + } + // --V or -v work the same + option = strings.TrimLeft(option, "-") + for _, flag := range flags { + _, isBool := flag.(*cli.BoolFlag) + for _, name := range flag.Names() { + if option == name || strings.HasPrefix(option, name+"=") { + return true, !isBool && !strings.Contains(option, "=") + } + } + } + return false, false +} + +func reorderOptions(app *cli.App, args []string) []string { + var newArgs = []string{args[0]} + var others []string + globalFlags := append(app.Flags, cli.VersionFlag) + for i := 1; i < len(args); i++ { + option := args[i] + if ok, hasValue := isFlag(globalFlags, option); ok { + newArgs = append(newArgs, option) + if hasValue { + i++ + newArgs = append(newArgs, args[i]) + } + } else { + others = append(others, option) + } + } + // no command + if len(others) == 0 { + return newArgs + } + cmdName := others[0] + var cmd *cli.Command + for _, c := range app.Commands { + if c.Name == cmdName { + cmd = c + } + } + if cmd == nil { + // can't recognize the command, skip it + return append(newArgs, others...) + } + + newArgs = append(newArgs, cmdName) + args, others = others[1:], nil + // -h is valid for all the commands + cmdFlags := append(cmd.Flags, cli.HelpFlag) + for i := 0; i < len(args); i++ { + option := args[i] + if ok, hasValue := isFlag(cmdFlags, option); ok { + newArgs = append(newArgs, option) + if hasValue { + i++ + newArgs = append(newArgs, args[i]) + } + } else { + if strings.HasPrefix(option, "-") && !stringContains(args, "--generate-bash-completion") { + logger.Fatalf("unknown option: %s", option) + } + others = append(others, option) + } + } + return append(newArgs, others...) +} + +func setupAgent(c *cli.Context) { + if !c.Bool("no-agent") { + go func() { + for port := 6060; port < 6100; port++ { + _ = http.ListenAndServe(fmt.Sprintf("127.0.0.1:%d", port), nil) + } + }() + go func() { + for port := 6070; port < 6100; port++ { + _ = agent.Listen(agent.Options{Addr: fmt.Sprintf("127.0.0.1:%d", port)}) + } + }() + } +} + +func setLoggerLevel(c *cli.Context) { + if c.Bool("trace") { + utils.SetLogLevel(logrus.TraceLevel) + } else if c.Bool("verbose") { + utils.SetLogLevel(logrus.DebugLevel) + } else if c.Bool("quiet") { + utils.SetLogLevel(logrus.WarnLevel) + } else { + utils.SetLogLevel(logrus.InfoLevel) + } + if c.Bool("no-color") { + utils.DisableLogColor() + } + setupAgent(c) +} + +func removePassword(uri string) { + var uri2 string + if strings.Contains(uri, "://") { + uri2 = utils.RemovePassword(uri) + } else { + uri2 = utils.RemovePassword("redis://" + uri) + } + if uri2 != uri { + for i, a := range os.Args { + if a == uri { + os.Args[i] = uri2 + break + } + } + } + gspt.SetProcTitle(strings.Join(os.Args, " ")) +} diff --git a/cmd/main_test.go b/cmd/main_test.go new file mode 100644 index 0000000..60cdbc3 --- /dev/null +++ b/cmd/main_test.go @@ -0,0 +1,62 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "reflect" + "testing" + + "github.com/urfave/cli/v2" +) + +func TestArgsOrder(t *testing.T) { + var app = &cli.App{ + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "verbose", + Aliases: []string{"v"}, + }, + &cli.Int64Flag{ + Name: "key", + Aliases: []string{"k"}, + }, + }, + Commands: []*cli.Command{ + { + Name: "cmd", + Flags: []cli.Flag{ + &cli.Int64Flag{ + Name: "k2", + }, + }, + }, + }, + } + + var cases = [][]string{ + {"test", "cmd", "a", "-k2", "v2", "b", "--v"}, + {"test", "--v", "cmd", "-k2", "v2", "a", "b"}, + {"test", "cmd", "a", "-k2=v", "--h"}, + {"test", "cmd", "-k2=v", "--h", "a"}, + } + for i := 0; i < len(cases); i += 2 { + oreded := reorderOptions(app, cases[i]) + if !reflect.DeepEqual(cases[i+1], oreded) { + t.Fatalf("expecte %v, but got %v", cases[i+1], oreded) + } + } +} diff --git a/cmd/mount.go b/cmd/mount.go new file mode 100644 index 0000000..4993c9e --- /dev/null +++ b/cmd/mount.go @@ -0,0 +1,441 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "net" + "net/http" + _ "net/http/pprof" + "os" + "os/signal" + "path" + "path/filepath" + "runtime" + "strings" + "syscall" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/urfave/cli/v2" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/metric" + "github.com/juicedata/juicefs/pkg/usage" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/version" + "github.com/juicedata/juicefs/pkg/vfs" +) + +func installHandler(mp string) { + // Go will catch all the signals + signal.Ignore(syscall.SIGPIPE) + signalChan := make(chan os.Signal, 10) + signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) + go func() { + for { + <-signalChan + go func() { _ = doUmount(mp, true) }() + go func() { + time.Sleep(time.Second * 3) + os.Exit(1) + }() + } + }() +} + +func exposeMetrics(m meta.Meta, c *cli.Context) string { + var ip, port string + //default set + ip, port, err := net.SplitHostPort(c.String("metrics")) + if err != nil { + logger.Fatalf("metrics format error: %v", err) + } + + meta.InitMetrics() + vfs.InitMetrics() + go metric.UpdateMetrics(m) + http.Handle("/metrics", promhttp.HandlerFor( + prometheus.DefaultGatherer, + promhttp.HandlerOpts{ + // Opt into OpenMetrics to support exemplars. + EnableOpenMetrics: true, + }, + )) + prometheus.MustRegister(prometheus.NewBuildInfoCollector()) + + // If not set metrics addr,the port will be auto set + if !c.IsSet("metrics") { + // If only set consul, ip will auto set + if c.IsSet("consul") { + ip, err = utils.GetLocalIp(c.String("consul")) + if err != nil { + logger.Errorf("Get local ip failed: %v", err) + return "" + } + } + } + + ln, err := net.Listen("tcp", net.JoinHostPort(ip, port)) + if err != nil { + // Don't try other ports on metrics set but listen failed + if c.IsSet("metrics") { + logger.Errorf("listen on %s:%s failed: %v", ip, port, err) + return "" + } + // Listen port on 0 will auto listen on a free port + ln, err = net.Listen("tcp", net.JoinHostPort(ip, "0")) + if err != nil { + logger.Errorf("Listen failed: %v", err) + return "" + } + } + + go func() { + if err := http.Serve(ln, nil); err != nil { + logger.Errorf("Serve for metrics: %s", err) + } + }() + + metricsAddr := ln.Addr().String() + logger.Infof("Prometheus metrics listening on %s", metricsAddr) + return metricsAddr +} + +func wrapRegister(mp, name string) { + registry := prometheus.NewRegistry() // replace default so only JuiceFS metrics are exposed + prometheus.DefaultGatherer = registry + metricLabels := prometheus.Labels{"mp": mp, "vol_name": name} + prometheus.DefaultRegisterer = prometheus.WrapRegistererWithPrefix("juicefs_", + prometheus.WrapRegistererWith(metricLabels, registry)) + prometheus.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) + prometheus.MustRegister(prometheus.NewGoCollector()) +} + +func mount(c *cli.Context) error { + setLoggerLevel(c) + if c.Args().Len() < 1 { + logger.Fatalf("Meta URL and mountpoint are required") + } + addr := c.Args().Get(0) + if c.Args().Len() < 2 { + logger.Fatalf("MOUNTPOINT is required") + } + mp := c.Args().Get(1) + fi, err := os.Stat(mp) + if !strings.Contains(mp, ":") && err != nil { + if err := os.MkdirAll(mp, 0777); err != nil { + if os.IsExist(err) { + // a broken mount point, umount it + if err = doUmount(mp, true); err != nil { + logger.Fatalf("umount %s: %s", mp, err) + } + } else { + logger.Fatalf("create %s: %s", mp, err) + } + } + } else if err == nil && fi.Size() == 0 { + // a broken mount point, umount it + if err = doUmount(mp, true); err != nil { + logger.Fatalf("umount %s: %s", mp, err) + } + } + var readOnly = c.Bool("read-only") + for _, o := range strings.Split(c.String("o"), ",") { + if o == "ro" { + readOnly = true + } + } + metaConf := &meta.Config{ + Retries: 10, + Strict: true, + CaseInsensi: strings.HasSuffix(mp, ":") && runtime.GOOS == "windows", + ReadOnly: readOnly, + OpenCache: time.Duration(c.Float64("open-cache") * 1e9), + MountPoint: mp, + Subdir: c.String("subdir"), + MaxDeletes: c.Int("max-deletes"), + } + m := meta.NewClient(addr, metaConf) + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + + // Wrap the default registry, all prometheus.MustRegister() calls should be afterwards + wrapRegister(mp, format.Name) + + if !c.Bool("writeback") && c.IsSet("upload-delay") { + logger.Warnf("delayed upload only work in writeback mode") + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + + GetTimeout: time.Second * time.Duration(c.Int("get-timeout")), + PutTimeout: time.Second * time.Duration(c.Int("put-timeout")), + MaxUpload: c.Int("max-uploads"), + Writeback: c.Bool("writeback"), + UploadDelay: c.Duration("upload-delay"), + Prefetch: c.Int("prefetch"), + BufferSize: c.Int("buffer-size") << 20, + UploadLimit: c.Int64("upload-limit") * 1e6 / 8, + DownloadLimit: c.Int64("download-limit") * 1e6 / 8, + + CacheDir: c.String("cache-dir"), + CacheSize: int64(c.Int("cache-size")), + FreeSpace: float32(c.Float64("free-space-ratio")), + CacheMode: os.FileMode(0600), + CacheFullBlock: !c.Bool("cache-partial-only"), + AutoCreate: true, + } + + if chunkConf.CacheDir != "memory" { + ds := utils.SplitDir(chunkConf.CacheDir) + for i := range ds { + ds[i] = filepath.Join(ds[i], format.UUID) + } + chunkConf.CacheDir = strings.Join(ds, string(os.PathListSeparator)) + } + if c.IsSet("bucket") { + format.Bucket = c.String("bucket") + } + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + store := chunk.NewCachedStore(blob, chunkConf) + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + chunkid := args[0].(uint64) + length := args[1].(uint32) + return store.Remove(chunkid, int(length)) + }) + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + }) + conf := &vfs.Config{ + Meta: metaConf, + Format: format, + Version: version.Version(), + Mountpoint: mp, + Chunk: &chunkConf, + } + + if c.Bool("background") && os.Getenv("JFS_FOREGROUND") == "" { + if runtime.GOOS != "windows" { + d := c.String("cache-dir") + if d != "memory" && !strings.HasPrefix(d, "/") { + ad, err := filepath.Abs(d) + if err != nil { + logger.Fatalf("cache-dir should be absolute path in daemon mode") + } else { + for i, a := range os.Args { + if a == d || a == "--cache-dir="+d { + os.Args[i] = a[:len(a)-len(d)] + ad + } + } + } + } + } + sqliteScheme := "sqlite3://" + if strings.HasPrefix(addr, sqliteScheme) { + path := addr[len(sqliteScheme):] + path2, err := filepath.Abs(path) + if err == nil && path2 != path { + for i, a := range os.Args { + if a == addr { + os.Args[i] = sqliteScheme + path2 + } + } + } + } + // The default log to syslog is only in daemon mode. + utils.InitLoggers(!c.Bool("no-syslog")) + err := makeDaemon(c, conf.Format.Name, conf.Mountpoint, m) + if err != nil { + logger.Fatalf("Failed to make daemon: %s", err) + } + } else { + go checkMountpoint(conf.Format.Name, mp) + } + + removePassword(addr) + err = m.NewSession() + if err != nil { + logger.Fatalf("new session: %s", err) + } + installHandler(mp) + v := vfs.NewVFS(conf, m, store) + metricsAddr := exposeMetrics(m, c) + if c.IsSet("consul") { + metric.RegisterToConsul(c.String("consul"), metricsAddr, mp) + } + if d := c.Duration("backup-meta"); d > 0 { + go vfs.Backup(m, blob, d) + } + if !c.Bool("no-usage-report") { + go usage.ReportUsage(m, version.Version()) + } + mount_main(v, c) + return m.CloseSession() +} + +func clientFlags() []cli.Flag { + var defaultCacheDir = "/var/jfsCache" + switch runtime.GOOS { + case "darwin": + fallthrough + case "windows": + homeDir, err := os.UserHomeDir() + if err != nil { + logger.Fatalf("%v", err) + return nil + } + defaultCacheDir = path.Join(homeDir, ".juicefs", "cache") + } + return []cli.Flag{ + &cli.StringFlag{ + Name: "bucket", + Usage: "customized endpoint to access object store", + }, + &cli.IntFlag{ + Name: "get-timeout", + Value: 60, + Usage: "the max number of seconds to download an object", + }, + &cli.IntFlag{ + Name: "put-timeout", + Value: 60, + Usage: "the max number of seconds to upload an object", + }, + &cli.IntFlag{ + Name: "io-retries", + Value: 30, + Usage: "number of retries after network failure", + }, + &cli.IntFlag{ + Name: "max-uploads", + Value: 20, + Usage: "number of connections to upload", + }, + &cli.IntFlag{ + Name: "max-deletes", + Value: 2, + Usage: "number of threads to delete objects", + }, + &cli.IntFlag{ + Name: "buffer-size", + Value: 300, + Usage: "total read/write buffering in MB", + }, + &cli.Int64Flag{ + Name: "upload-limit", + Value: 0, + Usage: "bandwidth limit for upload in Mbps", + }, + &cli.Int64Flag{ + Name: "download-limit", + Value: 0, + Usage: "bandwidth limit for download in Mbps", + }, + + &cli.IntFlag{ + Name: "prefetch", + Value: 1, + Usage: "prefetch N blocks in parallel", + }, + &cli.BoolFlag{ + Name: "writeback", + Usage: "upload objects in background", + }, + &cli.DurationFlag{ + Name: "upload-delay", + Usage: "delayed duration for uploading objects (\"s\", \"m\", \"h\")", + }, + &cli.StringFlag{ + Name: "cache-dir", + Value: defaultCacheDir, + Usage: "directory paths of local cache, use colon to separate multiple paths", + }, + &cli.IntFlag{ + Name: "cache-size", + Value: 100 << 10, + Usage: "size of cached objects in MiB", + }, + &cli.Float64Flag{ + Name: "free-space-ratio", + Value: 0.1, + Usage: "min free space (ratio)", + }, + &cli.BoolFlag{ + Name: "cache-partial-only", + Usage: "cache only random/small read", + }, + &cli.DurationFlag{ + Name: "backup-meta", + Value: time.Hour, + Usage: "interval to automatically backup metadata in the object storage (0 means disable backup)", + }, + + &cli.BoolFlag{ + Name: "read-only", + Usage: "allow lookup/read operations only", + }, + &cli.Float64Flag{ + Name: "open-cache", + Value: 0.0, + Usage: "open files cache timeout in seconds (0 means disable this feature)", + }, + &cli.StringFlag{ + Name: "subdir", + Usage: "mount a sub-directory as root", + }, + } +} + +func mountFlags() *cli.Command { + cmd := &cli.Command{ + Name: "mount", + Usage: "mount a volume", + ArgsUsage: "META-URL MOUNTPOINT", + Action: mount, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "metrics", + Value: "127.0.0.1:9567", + Usage: "address to export metrics", + }, + &cli.StringFlag{ + Name: "consul", + Value: "127.0.0.1:8500", + Usage: "consul address to register", + }, + &cli.BoolFlag{ + Name: "no-usage-report", + Usage: "do not send usage report", + }, + }, + } + cmd.Flags = append(cmd.Flags, mount_flags()...) + cmd.Flags = append(cmd.Flags, clientFlags()...) + return cmd +} diff --git a/cmd/mount_test.go b/cmd/mount_test.go new file mode 100644 index 0000000..02df976 --- /dev/null +++ b/cmd/mount_test.go @@ -0,0 +1,127 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "fmt" + "io/ioutil" + "net/http" + "net/url" + "reflect" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/go-redis/redis/v8" + + "github.com/juicedata/juicefs/pkg/meta" + + "github.com/agiledragon/gomonkey/v2" + + . "github.com/smartystreets/goconvey/convey" + "github.com/urfave/cli/v2" +) + +func Test_exposeMetrics(t *testing.T) { + Convey("Test_exposeMetrics", t, func() { + Convey("Test_exposeMetrics", func() { + addr := "redis://127.0.0.1:6379/10" + var conf = meta.Config{MaxDeletes: 1} + client := meta.NewClient(addr, &conf) + var appCtx *cli.Context + stringPatches := gomonkey.ApplyMethod(reflect.TypeOf(appCtx), "String", func(_ *cli.Context, arg string) string { + switch arg { + case "metrics": + return "127.0.0.1:9567" + case "consul": + return "127.0.0.1:8500" + default: + return "" + } + }) + isSetPatches := gomonkey.ApplyMethod(reflect.TypeOf(appCtx), "IsSet", func(_ *cli.Context, _ string) bool { + return false + }) + defer stringPatches.Reset() + defer isSetPatches.Reset() + ResetPrometheus() + metricsAddr := exposeMetrics(client, appCtx) + + u := url.URL{Scheme: "http", Host: metricsAddr, Path: "/metrics"} + resp, err := http.Get(u.String()) + So(err, ShouldBeNil) + all, err := ioutil.ReadAll(resp.Body) + So(err, ShouldBeNil) + So(string(all), ShouldNotBeBlank) + }) + }) +} + +func ResetPrometheus() { + http.DefaultServeMux = http.NewServeMux() + prometheus.DefaultRegisterer = prometheus.NewRegistry() +} + +func MountTmp(metaUrl, mountpoint string) error { + ResetRedis(metaUrl) + formatArgs := []string{"", "format", "--storage", "file", "--bucket", "/tmp/testMountDir", metaUrl, "test"} + err := Main(formatArgs) + if err != nil { + return err + } + mountArgs := []string{"", "mount", metaUrl, mountpoint} + + //Must be reset, otherwise panic will appear + ResetPrometheus() + + go func() { + err := Main(mountArgs) + if err != nil { + fmt.Printf("mount failed: %v", err) + } + }() + time.Sleep(2 * time.Second) + return nil +} +func ResetRedis(metaUrl string) { + opt, _ := redis.ParseURL(metaUrl) + rdb := redis.NewClient(opt) + rdb.FlushDB(context.Background()) +} + +func TestMount(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + + err := ioutil.WriteFile(fmt.Sprintf("%s/f1.txt", mountpoint), []byte("test"), 0644) + if err != nil { + t.Fatalf("Test mount failed: %v", err) + } + +} diff --git a/cmd/mount_unix.go b/cmd/mount_unix.go new file mode 100644 index 0000000..0370731 --- /dev/null +++ b/cmd/mount_unix.go @@ -0,0 +1,191 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bytes" + "io/ioutil" + "os" + "path" + "path/filepath" + "runtime" + "syscall" + "time" + + "github.com/juicedata/godaemon" + "github.com/juicedata/juicefs/pkg/fuse" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/vfs" + "github.com/urfave/cli/v2" +) + +func checkMountpoint(name, mp string) { + for i := 0; i < 20; i++ { + time.Sleep(time.Millisecond * 500) + st, err := os.Stat(mp) + if err == nil { + if sys, ok := st.Sys().(*syscall.Stat_t); ok && sys.Ino == 1 { + logger.Infof("\033[92mOK\033[0m, %s is ready at %s", name, mp) + return + } + } + _, _ = os.Stdout.WriteString(".") + _ = os.Stdout.Sync() + } + _, _ = os.Stdout.WriteString("\n") + logger.Fatalf("fail to mount after 10 seconds, please check the log (/var/log/juicefs.log) or re-mount in foreground") +} + +func makeDaemon(c *cli.Context, name, mp string, m meta.Meta) error { + var attrs godaemon.DaemonAttr + attrs.OnExit = func(stage int) error { + if stage != 0 { + return nil + } + checkMountpoint(name, mp) + return nil + } + + // the current dir will be changed to root in daemon, + // so the mount point has to be an absolute path. + if godaemon.Stage() == 0 { + for i, a := range os.Args { + if a == mp { + amp, err := filepath.Abs(mp) + if err == nil { + os.Args[i] = amp + } else { + logger.Warnf("abs of %s: %s", mp, err) + } + } + } + var err error + logfile := c.String("log") + attrs.Stdout, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + logger.Errorf("open log file %s: %s", logfile, err) + } + } + if godaemon.Stage() <= 1 { + err := m.Shutdown() + if err != nil { + logger.Errorf("shutdown: %s", err) + } + } + _, _, err := godaemon.MakeDaemon(&attrs) + return err +} + +func mount_flags() []cli.Flag { + var defaultLogDir = "/var/log" + switch runtime.GOOS { + case "darwin": + homeDir, err := os.UserHomeDir() + if err != nil { + logger.Fatalf("%v", err) + return nil + } + defaultLogDir = path.Join(homeDir, ".juicefs") + } + return []cli.Flag{ + &cli.BoolFlag{ + Name: "d", + Aliases: []string{"background"}, + Usage: "run in background", + }, + &cli.BoolFlag{ + Name: "no-syslog", + Usage: "disable syslog", + }, + &cli.StringFlag{ + Name: "log", + Value: path.Join(defaultLogDir, "juicefs.log"), + Usage: "path of log file when running in background", + }, + &cli.StringFlag{ + Name: "o", + Usage: "other FUSE options", + }, + &cli.Float64Flag{ + Name: "attr-cache", + Value: 1.0, + Usage: "attributes cache timeout in seconds", + }, + &cli.Float64Flag{ + Name: "entry-cache", + Value: 1.0, + Usage: "file entry cache timeout in seconds", + }, + &cli.Float64Flag{ + Name: "dir-entry-cache", + Value: 1.0, + Usage: "dir entry cache timeout in seconds", + }, + &cli.BoolFlag{ + Name: "enable-xattr", + Usage: "enable extended attributes (xattr)", + }, + } +} + +func disableUpdatedb() { + path := "/etc/updatedb.conf" + data, err := ioutil.ReadFile(path) + if err != nil { + return + } + fstype := "fuse.juicefs" + if bytes.Contains(data, []byte(fstype)) { + return + } + // assume that fuse.sshfs is already in PRUNEFS + knownFS := "fuse.sshfs" + p1 := bytes.Index(data, []byte("PRUNEFS")) + p2 := bytes.Index(data, []byte(knownFS)) + if p1 > 0 && p2 > p1 { + var nd []byte + nd = append(nd, data[:p2]...) + nd = append(nd, fstype...) + nd = append(nd, ' ') + nd = append(nd, data[p2:]...) + err = ioutil.WriteFile(path, nd, 0644) + if err != nil { + logger.Warnf("update %s: %s", path, err) + } else { + logger.Infof("Add %s into PRUNEFS of %s", fstype, path) + } + } +} + +func mount_main(v *vfs.VFS, c *cli.Context) { + if os.Getuid() == 0 && os.Getpid() != 1 { + disableUpdatedb() + } + + conf := v.Conf + conf.AttrTimeout = time.Millisecond * time.Duration(c.Float64("attr-cache")*1000) + conf.EntryTimeout = time.Millisecond * time.Duration(c.Float64("entry-cache")*1000) + conf.DirEntryTimeout = time.Millisecond * time.Duration(c.Float64("dir-entry-cache")*1000) + logger.Infof("Mounting volume %s at %s ...", conf.Format.Name, conf.Mountpoint) + err := fuse.Serve(v, c.String("o"), c.Bool("enable-xattr")) + if err != nil { + logger.Fatalf("fuse: %s", err) + } +} diff --git a/cmd/mount_windows.go b/cmd/mount_windows.go new file mode 100644 index 0000000..15672e1 --- /dev/null +++ b/cmd/mount_windows.go @@ -0,0 +1,58 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/vfs" + "github.com/juicedata/juicefs/pkg/winfsp" + "github.com/urfave/cli/v2" +) + +func mount_flags() []cli.Flag { + return []cli.Flag{ + &cli.StringFlag{ + Name: "o", + Usage: "other FUSE options", + }, + &cli.BoolFlag{ + Name: "as-root", + Usage: "Access files as administrator", + }, + &cli.Float64Flag{ + Name: "file-cache-to", + Value: 0.1, + Usage: "Cache file attributes in seconds", + }, + &cli.Float64Flag{ + Name: "delay-close", + Usage: "delay file closing in seconds.", + }, + } +} + +func makeDaemon(c *cli.Context, name, mp string, m meta.Meta) error { + logger.Warnf("Cannot run in background in Windows.") + return nil +} + +func mount_main(v *vfs.VFS, c *cli.Context) { + winfsp.Serve(v, c.String("o"), c.Float64("file-cache-to"), c.Bool("as-root"), c.Int("delay-close")) +} + +func checkMountpoint(name, mp string) { +} diff --git a/cmd/profile.go b/cmd/profile.go new file mode 100644 index 0000000..a3a2087 --- /dev/null +++ b/cmd/profile.go @@ -0,0 +1,399 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bufio" + "fmt" + "os" + "path" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/mattn/go-isatty" + "github.com/urfave/cli/v2" +) + +var findDigits = regexp.MustCompile(`\d+`) + +type profiler struct { + file *os.File + replay bool + tty bool + interval time.Duration + uids []string + gids []string + pids []string + entryChan chan *logEntry // one line + statsChan chan map[string]*stat + pause chan bool + /* --- for replay --- */ + printTime chan time.Time + done chan bool +} + +type stat struct { + count int + total int // total latency in 'us' +} + +type keyStat struct { + key string + sPtr *stat +} + +type logEntry struct { + ts time.Time + uid, gid, pid string + op string + latency int // us +} + +func parseLine(line string) *logEntry { + if len(line) < 3 { // dummy line: "#" + return nil + } + fields := strings.Fields(line) + if len(fields) < 5 { + logger.Warnf("Log line is invalid: %s", line) + return nil + } + ts, err := time.Parse("2006.01.02 15:04:05.000000", strings.Join([]string{fields[0], fields[1]}, " ")) + if err != nil { + logger.Warnf("Failed to parse log line: %s: %s", line, err) + return nil + } + ids := findDigits.FindAllString(fields[2], 3) // e.g: [uid:0,gid:0,pid:36674] + if len(ids) != 3 { + logger.Warnf("Log line is invalid: %s", line) + return nil + } + latStr := fields[len(fields)-1] // e.g: <0.000003> + latFloat, err := strconv.ParseFloat(latStr[1:len(latStr)-1], 64) + if err != nil { + logger.Warnf("Failed to parse log line: %s: %s", line, err) + return nil + } + return &logEntry{ + ts: ts, + uid: ids[0], + gid: ids[1], + pid: ids[2], + op: fields[3], + latency: int(latFloat * 1000000.0), + } +} + +func (p *profiler) reader() { + scanner := bufio.NewScanner(p.file) + for scanner.Scan() { + p.entryChan <- parseLine(scanner.Text()) + } + if err := scanner.Err(); err != nil { + logger.Fatalf("Reading log file failed with error: %s", err) + } + close(p.entryChan) + if p.replay { + p.done <- true + } +} + +func (p *profiler) isValid(entry *logEntry) bool { + valid := func(f []string, e string) bool { + if len(f) == 1 && f[0] == "" { + return true + } + for _, v := range f { + if v == e { + return true + } + } + return false + } + return valid(p.uids, entry.uid) && valid(p.gids, entry.gid) && valid(p.pids, entry.pid) +} + +func (p *profiler) counter() { + var edge time.Time + stats := make(map[string]*stat) + for { + select { + case entry := <-p.entryChan: + if entry == nil { + break + } + if !p.isValid(entry) { + break + } + if p.replay { + if edge.IsZero() { + edge = entry.ts.Add(p.interval) + } + for ; entry.ts.After(edge); edge = edge.Add(p.interval) { + p.statsChan <- stats + p.printTime <- edge + stats = make(map[string]*stat) + } + } + value, ok := stats[entry.op] + if !ok { + value = &stat{} + stats[entry.op] = value + } + value.count++ + value.total += entry.latency + case p.statsChan <- stats: + if p.replay { + p.printTime <- edge + edge = edge.Add(p.interval) + } + stats = make(map[string]*stat) + } + } +} + +func (p *profiler) fastCounter() { + var start, last time.Time + stats := make(map[string]*stat) + for entry := range p.entryChan { + if entry == nil { + continue + } + if !p.isValid(entry) { + continue + } + if start.IsZero() { + start = entry.ts + } + last = entry.ts + value, ok := stats[entry.op] + if !ok { + value = &stat{} + stats[entry.op] = value + } + value.count++ + value.total += entry.latency + } + p.statsChan <- stats + p.printTime <- start + p.printTime <- last +} + +func printLines(lines []string, tty bool) { + if tty { + fmt.Print("\033[2J\033[1;1H") // clear screen + fmt.Printf("\033[92m%s\n\033[0m", lines[0]) + fmt.Printf("\033[97m%s\n\033[0m", lines[1]) + fmt.Printf("\033[94m%s\n\033[0m", lines[2]) + if len(lines) > 3 { + for _, l := range lines[3:] { + fmt.Printf("\033[93m%s\n\033[0m", l) + } + } + } else { + fmt.Println(lines[0]) + for _, l := range lines[2:] { + fmt.Println(l) + } + fmt.Println() + } +} + +func (p *profiler) flush(timeStamp time.Time, keyStats []keyStat, done bool) { + var head string + if p.replay { + if done { + head = "(replay done)" + } else { + head = "(replaying)" + } + } + output := make([]string, 3) + output[0] = fmt.Sprintf("> JuiceFS Profiling %13s Refresh: %.0f seconds %20s", + head, p.interval.Seconds(), timeStamp.Format("2006-01-02T15:04:05")) + output[2] = fmt.Sprintf("%-14s %10s %15s %18s %14s", "Operation", "Count", "Average(us)", "Total(us)", "Percent(%)") + for _, s := range keyStats { + output = append(output, fmt.Sprintf("%-14s %10d %15.0f %18d %14.1f", + s.key, s.sPtr.count, float64(s.sPtr.total)/float64(s.sPtr.count), s.sPtr.total, float64(s.sPtr.total)/float64(p.interval.Microseconds())*100.0)) + } + if p.replay { + output[1] = fmt.Sprintln("\n[enter]Pause/Continue") + } + printLines(output, p.tty) +} + +func (p *profiler) flusher() { + var paused, done bool + ticker := time.NewTicker(p.interval) + ts := time.Now() + p.flush(ts, nil, false) + for { + select { + case t := <-ticker.C: + stats := <-p.statsChan + if paused { // ticker event might be passed long ago + paused = false + ticker.Stop() + ticker = time.NewTicker(p.interval) + t = time.Now() + } + if done { + ticker.Stop() + } + if p.replay { + ts = <-p.printTime + } else { + ts = t + } + keyStats := make([]keyStat, 0, len(stats)) + for k, s := range stats { + keyStats = append(keyStats, keyStat{k, s}) + } + sort.Slice(keyStats, func(i, j int) bool { // reversed + return keyStats[i].sPtr.total > keyStats[j].sPtr.total + }) + p.flush(ts, keyStats, done) + if done { + os.Exit(0) + } + case paused = <-p.pause: + fmt.Printf("\n\033[97mPaused. Press [enter] to continue.\n\033[0m") + <-p.pause + case done = <-p.done: + } + } +} + +func profile(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + logger.Fatalln("Mount point or log file must be provided!") + } + logPath := ctx.Args().First() + st, err := os.Stat(logPath) + if err != nil { + logger.Fatalf("Failed to stat path %s: %s", logPath, err) + } + var replay bool + if st.IsDir() { // mount point + inode, err := utils.GetFileInode(logPath) + if err != nil { + logger.Fatalf("Failed to lookup inode for %s: %s", logPath, err) + } + if inode != 1 { + logger.Fatalf("Path %s is not a mount point!", logPath) + } + logPath = path.Join(logPath, ".accesslog") + } else { // log file to be replayed + replay = true + } + nodelay := ctx.Int64("interval") == 0 + if nodelay && !replay { + logger.Fatalf("Interval must be > 0 for real time mode!") + } + file, err := os.Open(logPath) + if err != nil { + logger.Fatalf("Failed to open log file %s: %s", logPath, err) + } + defer file.Close() + + prof := profiler{ + file: file, + replay: replay, + tty: isatty.IsTerminal(os.Stdout.Fd()), + interval: time.Second * time.Duration(ctx.Int64("interval")), + uids: strings.Split(ctx.String("uid"), ","), + gids: strings.Split(ctx.String("gid"), ","), + pids: strings.Split(ctx.String("pid"), ","), + entryChan: make(chan *logEntry, 16), + statsChan: make(chan map[string]*stat), + pause: make(chan bool), + } + if prof.replay { + prof.printTime = make(chan time.Time) + prof.done = make(chan bool) + } + + go prof.reader() + if nodelay { + go prof.fastCounter() + stats := <-prof.statsChan + start := <-prof.printTime + last := <-prof.printTime + keyStats := make([]keyStat, 0, len(stats)) + for k, s := range stats { + keyStats = append(keyStats, keyStat{k, s}) + } + sort.Slice(keyStats, func(i, j int) bool { // reversed + return keyStats[i].sPtr.total > keyStats[j].sPtr.total + }) + prof.replay = false + prof.interval = last.Sub(start) + prof.flush(last, keyStats, <-prof.done) + return nil + } + + go prof.counter() + go prof.flusher() + var input string + for { + if _, err = fmt.Scanln(&input); err != nil { + logger.Fatalf("Failed to scan input: %s", err) + } + if prof.tty { + fmt.Print("\033[1A\033[K") // move cursor back + } + if prof.replay { + prof.pause <- true // pause/continue + } + } +} + +func profileFlags() *cli.Command { + return &cli.Command{ + Name: "profile", + Usage: "analyze access log", + Action: profile, + ArgsUsage: "MOUNTPOINT/LOGFILE", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "uid", + Aliases: []string{"u"}, + Usage: "track only specified UIDs(separated by comma ,)", + }, + &cli.StringFlag{ + Name: "gid", + Aliases: []string{"g"}, + Usage: "track only specified GIDs(separated by comma ,)", + }, + &cli.StringFlag{ + Name: "pid", + Aliases: []string{"p"}, + Usage: "track only specified PIDs(separated by comma ,)", + }, + &cli.Int64Flag{ + Name: "interval", + Value: 2, + Usage: "flush interval in seconds; set it to 0 when replaying a log file to get an immediate result", + }, + }, + } +} diff --git a/cmd/rmr.go b/cmd/rmr.go new file mode 100644 index 0000000..cddb3b7 --- /dev/null +++ b/cmd/rmr.go @@ -0,0 +1,105 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "syscall" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/pkg/errors" + "github.com/urfave/cli/v2" +) + +func rmrFlags() *cli.Command { + return &cli.Command{ + Name: "rmr", + Usage: "remove directories recursively", + ArgsUsage: "PATH ...", + Action: rmr, + } +} + +func openController(path string) *os.File { + f, err := os.OpenFile(filepath.Join(path, ".control"), os.O_RDWR, 0) + if err != nil && !os.IsNotExist(err) && !errors.Is(err, syscall.ENOTDIR) { + logger.Errorf("%s", err) + return nil + } + if err != nil && path != "/" { + return openController(filepath.Dir(path)) + } + return f +} + +func rmr(ctx *cli.Context) error { + if runtime.GOOS == "windows" { + logger.Infof("Windows is not supported") + return nil + } + if ctx.Args().Len() < 1 { + logger.Infof("PATH is needed") + return nil + } + for i := 0; i < ctx.Args().Len(); i++ { + path := ctx.Args().Get(i) + p, err := filepath.Abs(path) + if err != nil { + logger.Errorf("abs of %s: %s", path, err) + continue + } + d := filepath.Dir(p) + name := filepath.Base(p) + inode, err := utils.GetFileInode(d) + if err != nil { + return fmt.Errorf("lookup inode for %s: %s", d, err) + } + f := openController(d) + if f == nil { + logger.Errorf("%s is not inside JuiceFS", path) + continue + } + wb := utils.NewBuffer(8 + 8 + 1 + uint32(len(name))) + wb.Put32(meta.Rmr) + wb.Put32(8 + 1 + uint32(len(name))) + wb.Put64(inode) + wb.Put8(uint8(len(name))) + wb.Put([]byte(name)) + _, err = f.Write(wb.Bytes()) + if err != nil { + logger.Fatalf("write message: %s", err) + } + var errs = make([]byte, 1) + n, err := f.Read(errs) + if err != nil || n != 1 { + logger.Fatalf("read message: %d %s", n, err) + } + if errs[0] != 0 { + errno := syscall.Errno(errs[0]) + if runtime.GOOS == "windows" { + errno += 0x20000000 + } + logger.Fatalf("RMR %s: %s", path, errno) + } + _ = f.Close() + } + return nil +} diff --git a/cmd/rmr_test.go b/cmd/rmr_test.go new file mode 100644 index 0000000..2d226be --- /dev/null +++ b/cmd/rmr_test.go @@ -0,0 +1,65 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "testing" +) + +func TestRmr(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func(mountpoint string) { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }(mountpoint) + + paths := []string{"/dir1", "/dir2", "/dir3/dir2"} + for _, path := range paths { + if err := os.MkdirAll(fmt.Sprintf("%s%s/dir2/dir3/dir4/dir5", mountpoint, path), 0777); err != nil { + t.Fatalf("Test mount err %v", err) + } + } + for i := 0; i < 5; i++ { + filename := fmt.Sprintf("%s/dir1/f%d.txt", mountpoint, i) + err := ioutil.WriteFile(filename, []byte("test"), 0644) + if err != nil { + t.Fatalf("Test mount failed : %v", err) + } + } + + rmrArgs := []string{"", "rmr", mountpoint + paths[0], mountpoint + paths[1], mountpoint + paths[2]} + if err := Main(rmrArgs); err != nil { + t.Fatalf("rmr failed : %v", err) + } + + for _, path := range paths { + dir, err := os.ReadDir(mountpoint + path) + if len(dir) != 0 { + t.Fatalf("test rmr error: %v", err) + } + } +} diff --git a/cmd/stats.go b/cmd/stats.go new file mode 100644 index 0000000..2e092d1 --- /dev/null +++ b/cmd/stats.go @@ -0,0 +1,400 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "strconv" + "strings" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/mattn/go-isatty" + "github.com/urfave/cli/v2" +) + +const ( + BLACK = 30 + iota + RED + GREEN + YELLOW + BLUE + MAGENTA + CYAN + WHITE +) + +const ( + RESET_SEQ = "\033[0m" + COLOR_SEQ = "\033[1;" // %dm + COLOR_DARK_SEQ = "\033[0;" // %dm + UNDERLINE_SEQ = "\033[4m" + // BOLD_SEQ = "\033[1m" +) + +type statsWatcher struct { + tty bool + interval uint + path string + header string + sections []*section +} + +func (w *statsWatcher) colorize(msg string, color int, dark bool, underline bool) string { + if !w.tty || msg == "" || msg == " " { + return msg + } + var cseq, useq string + if dark { + cseq = COLOR_DARK_SEQ + } else { + cseq = COLOR_SEQ + } + if underline { + useq = UNDERLINE_SEQ + } + return fmt.Sprintf("%s%s%dm%s%s", useq, cseq, color, msg, RESET_SEQ) +} + +const ( + metricByte = 1 << iota + metricCount + metricTime + metricCPU + metricGauge + metricCounter + metricHist +) + +type item struct { + nick string // must be size <= 5 + name string + typ uint8 +} + +type section struct { + name string + items []*item +} + +func (w *statsWatcher) buildSchema(schema string, verbosity uint) { + for _, r := range schema { + var s section + switch r { + case 'u': + s.name = "usage" + s.items = append(s.items, &item{"cpu", "juicefs_cpu_usage", metricCPU | metricCounter}) + s.items = append(s.items, &item{"mem", "juicefs_memory", metricGauge}) + s.items = append(s.items, &item{"buf", "juicefs_used_buffer_size_bytes", metricGauge}) + if verbosity > 0 { + s.items = append(s.items, &item{"cache", "juicefs_store_cache_size_bytes", metricGauge}) + } + case 'f': + s.name = "fuse" + s.items = append(s.items, &item{"ops", "juicefs_fuse_ops_durations_histogram_seconds", metricTime | metricHist}) + s.items = append(s.items, &item{"read", "juicefs_fuse_read_size_bytes_sum", metricByte | metricCounter}) + s.items = append(s.items, &item{"write", "juicefs_fuse_written_size_bytes_sum", metricByte | metricCounter}) + case 'm': + s.name = "meta" + s.items = append(s.items, &item{"ops", "juicefs_meta_ops_durations_histogram_seconds", metricTime | metricHist}) + if verbosity > 0 { + s.items = append(s.items, &item{"txn", "juicefs_transaction_durations_histogram_seconds", metricTime | metricHist}) + s.items = append(s.items, &item{"retry", "juicefs_transaction_restart", metricCount | metricCounter}) + } + case 'c': + s.name = "blockcache" + s.items = append(s.items, &item{"read", "juicefs_blockcache_hit_bytes", metricByte | metricCounter}) + s.items = append(s.items, &item{"write", "juicefs_blockcache_write_bytes", metricByte | metricCounter}) + case 'o': + s.name = "object" + s.items = append(s.items, &item{"get", "juicefs_object_request_data_bytes_GET", metricByte | metricCounter}) + if verbosity > 0 { + s.items = append(s.items, &item{"get_c", "juicefs_object_request_durations_histogram_seconds_GET", metricTime | metricHist}) + } + s.items = append(s.items, &item{"put", "juicefs_object_request_data_bytes_PUT", metricByte | metricCounter}) + if verbosity > 0 { + s.items = append(s.items, &item{"put_c", "juicefs_object_request_durations_histogram_seconds_PUT", metricTime | metricHist}) + s.items = append(s.items, &item{"del_c", "juicefs_object_request_durations_histogram_seconds_DELETE", metricTime | metricHist}) + } + case 'g': + s.name = "go" + s.items = append(s.items, &item{"alloc", "juicefs_go_memstats_alloc_bytes", metricGauge}) + s.items = append(s.items, &item{"sys", "juicefs_go_memstats_sys_bytes", metricGauge}) + default: + fmt.Printf("Warning: no item defined for %c\n", r) + continue + } + w.sections = append(w.sections, &s) + } + if len(w.sections) == 0 { + logger.Fatalln("no section to watch, please check the schema string") + } +} + +func padding(name string, width int, char byte) string { + pad := width - len(name) + if pad < 0 { + pad = 0 + name = name[0:width] + } + prefix := (pad + 1) / 2 + buf := make([]byte, width) + for i := 0; i < prefix; i++ { + buf[i] = char + } + copy(buf[prefix:], name) + for i := prefix + len(name); i < width; i++ { + buf[i] = char + } + return string(buf) +} + +func (w *statsWatcher) formatHeader() { + headers := make([]string, len(w.sections)) + subHeaders := make([]string, len(w.sections)) + for i, s := range w.sections { + subs := make([]string, 0, len(s.items)) + for _, it := range s.items { + subs = append(subs, w.colorize(padding(it.nick, 5, ' '), BLUE, false, true)) + if it.typ&metricHist != 0 { + if it.typ&metricTime != 0 { + subs = append(subs, w.colorize(" lat ", BLUE, false, true)) + } else { + subs = append(subs, w.colorize(" avg ", BLUE, false, true)) + } + } + } + width := 6*len(subs) - 1 // nick(5) + space(1) + subHeaders[i] = strings.Join(subs, " ") + headers[i] = w.colorize(padding(s.name, width, '-'), BLUE, true, false) + } + w.header = fmt.Sprintf("%s\n%s", strings.Join(headers, " "), + strings.Join(subHeaders, w.colorize("|", BLUE, true, false))) +} + +func (w *statsWatcher) formatU64(v float64, dark, isByte bool) string { + if v <= 0.0 { + return w.colorize(" 0 ", BLACK, false, false) + } + var vi uint64 + var unit string + var color int + switch vi = uint64(v); { + case vi < 10000: + if isByte { + unit = "B" + } else { + unit = " " + } + color = RED + case vi>>10 < 10000: + vi, unit, color = vi>>10, "K", YELLOW + case vi>>20 < 10000: + vi, unit, color = vi>>20, "M", GREEN + case vi>>30 < 10000: + vi, unit, color = vi>>30, "G", BLUE + case vi>>40 < 10000: + vi, unit, color = vi>>40, "T", MAGENTA + default: + vi, unit, color = vi>>50, "P", CYAN + } + return w.colorize(fmt.Sprintf("%4d", vi), color, dark, false) + + w.colorize(unit, BLACK, false, false) +} + +func (w *statsWatcher) formatTime(v float64, dark bool) string { + var ret string + var color int + switch { + case v <= 0.0: + ret, color, dark = " 0 ", BLACK, false + case v < 10.0: + ret, color = fmt.Sprintf("%4.2f ", v), GREEN + case v < 100.0: + ret, color = fmt.Sprintf("%4.1f ", v), YELLOW + case v < 10000.0: + ret, color = fmt.Sprintf("%4.f ", v), RED + default: + ret, color = fmt.Sprintf("%1.e", v), MAGENTA + } + return w.colorize(ret, color, dark, false) +} + +func (w *statsWatcher) formatCPU(v float64, dark bool) string { + var ret string + var color int + switch v = v * 100.0; { + case v <= 0.0: + ret, color = " 0.0", WHITE + case v < 30.0: + ret, color = fmt.Sprintf("%4.1f", v), GREEN + case v < 100.0: + ret, color = fmt.Sprintf("%4.1f", v), YELLOW + default: + ret, color = fmt.Sprintf("%4.f", v), RED + } + return w.colorize(ret, color, dark, false) + + w.colorize("%", BLACK, false, false) +} + +func (w *statsWatcher) printDiff(left, right map[string]float64, dark bool) { + if !w.tty && dark { + return + } + values := make([]string, len(w.sections)) + for i, s := range w.sections { + vals := make([]string, 0, len(s.items)) + for _, it := range s.items { + switch it.typ & 0xF0 { + case metricGauge: // currently must be metricByte + vals = append(vals, w.formatU64(right[it.name], dark, true)) + case metricCounter: + v := (right[it.name] - left[it.name]) + if !dark { + v /= float64(w.interval) + } + if it.typ&metricByte != 0 { + vals = append(vals, w.formatU64(v, dark, true)) + } else if it.typ&metricCPU != 0 { + vals = append(vals, w.formatCPU(v, dark)) + } else { // metricCount + vals = append(vals, w.formatU64(v, dark, false)) + } + case metricHist: // metricTime + count := right[it.name+"_total"] - left[it.name+"_total"] + var avg float64 + if count > 0.0 { + cost := right[it.name+"_sum"] - left[it.name+"_sum"] + if it.typ&metricTime != 0 { + cost *= 1000 // s -> ms + } + avg = cost / count + } + if !dark { + count /= float64(w.interval) + } + vals = append(vals, w.formatU64(count, dark, false), w.formatTime(avg, dark)) + } + } + values[i] = strings.Join(vals, " ") + } + if w.tty && dark { + fmt.Printf("%s\r", strings.Join(values, w.colorize("|", BLUE, true, false))) + } else { + fmt.Printf("%s\n", strings.Join(values, w.colorize("|", BLUE, true, false))) + } +} + +func readStats(path string) map[string]float64 { + f, err := os.Open(path) + if err != nil { + logger.Warnf("open %s: %s", path, err) + return nil + } + defer f.Close() + d, err := ioutil.ReadAll(f) + if err != nil { + logger.Warnf("read %s: %s", path, err) + return nil + } + stats := make(map[string]float64) + lines := strings.Split(string(d), "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) == 2 { + stats[fields[0]], err = strconv.ParseFloat(fields[1], 64) + if err != nil { + logger.Warnf("parse %s: %s", fields[1], err) + } + } + } + return stats +} + +func stats(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + logger.Fatalln("mount point must be provided") + } + mp := ctx.Args().First() + inode, err := utils.GetFileInode(mp) + if err != nil { + logger.Fatalf("lookup inode for %s: %s", mp, err) + } + if inode != 1 { + logger.Fatalf("path %s is not a mount point", mp) + } + + watcher := &statsWatcher{ + tty: !ctx.Bool("no-color") && isatty.IsTerminal(os.Stdout.Fd()), + interval: ctx.Uint("interval"), + path: path.Join(mp, ".stats"), + } + watcher.buildSchema(ctx.String("schema"), ctx.Uint("verbosity")) + watcher.formatHeader() + + var tick uint + var start, last, current map[string]float64 + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + current = readStats(watcher.path) + start = current + last = current + for { + if tick%(watcher.interval*30) == 0 { + fmt.Println(watcher.header) + } + if tick%watcher.interval == 0 { + watcher.printDiff(start, current, false) + start = current + } else { + watcher.printDiff(last, current, true) + } + last = current + tick++ + <-ticker.C + current = readStats(watcher.path) + } +} + +func statsFlags() *cli.Command { + return &cli.Command{ + Name: "stats", + Usage: "show runtime statistics", + Action: stats, + ArgsUsage: "MOUNTPOINT", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "schema", + Value: "ufmco", + Usage: "schema string that controls the output sections (u: usage, f: fuse, m: meta, c: blockcache, o: object, g: go)", + }, + &cli.UintFlag{ + Name: "interval", + Value: 1, + Usage: "interval in seconds between each update", + }, + &cli.UintFlag{ + Name: "verbosity", + Usage: "verbosity level, 0 or 1 is enough for most cases", + }, + }, + } +} diff --git a/cmd/status.go b/cmd/status.go new file mode 100644 index 0000000..b249af3 --- /dev/null +++ b/cmd/status.go @@ -0,0 +1,86 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "encoding/json" + "fmt" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/urfave/cli/v2" +) + +type sections struct { + Setting *meta.Format + Sessions []*meta.Session +} + +func printJson(v interface{}) { + output, err := json.MarshalIndent(v, "", " ") + if err != nil { + logger.Fatalf("json: %s", err) + } + fmt.Println(string(output)) +} + +func status(ctx *cli.Context) error { + setLoggerLevel(ctx) + if ctx.Args().Len() < 1 { + return fmt.Errorf("META-URL is needed") + } + removePassword(ctx.Args().Get(0)) + m := meta.NewClient(ctx.Args().Get(0), &meta.Config{Retries: 10, Strict: true}) + + if sid := ctx.Uint64("session"); sid != 0 { + s, err := m.GetSession(sid) + if err != nil { + logger.Fatalf("get session: %s", err) + } + printJson(s) + return nil + } + + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + format.RemoveSecret() + + sessions, err := m.ListSessions() + if err != nil { + logger.Fatalf("list sessions: %s", err) + } + + printJson(§ions{format, sessions}) + return nil +} + +func statusFlags() *cli.Command { + return &cli.Command{ + Name: "status", + Usage: "show status of JuiceFS", + ArgsUsage: "META-URL", + Action: status, + Flags: []cli.Flag{ + &cli.Uint64Flag{ + Name: "session", + Aliases: []string{"s"}, + Usage: "show detailed information (sustained inodes, locks) of the specified session (sid)", + }, + }, + } +} diff --git a/cmd/status_test.go b/cmd/status_test.go new file mode 100644 index 0000000..1315209 --- /dev/null +++ b/cmd/status_test.go @@ -0,0 +1,78 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "encoding/json" + "io/ioutil" + "os" + "testing" + + "github.com/agiledragon/gomonkey/v2" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestStatus(t *testing.T) { + Convey("TestInfo", t, func() { + Convey("TestInfo", func() { + tmpFile, err := os.CreateTemp("/tmp", "") + if err != nil { + t.Fatalf("creat tmp file failed: %v", err) + } + defer tmpFile.Close() + defer os.Remove(tmpFile.Name()) + if err != nil { + t.Fatalf("create temporary file: %v", err) + } + // mock os.Stdout + patches := gomonkey.ApplyGlobalVar(os.Stdout, *tmpFile) + defer patches.Reset() + metaUrl := "redis://localhost:6379/10" + mountpoint := "/tmp/testDir" + statusArgs := []string{"", "status", metaUrl} + + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func() { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }() + + if err := Main(statusArgs); err != nil { + t.Fatalf("test status failed: %v", err) + } + + content, err := ioutil.ReadFile(tmpFile.Name()) + if err != nil { + t.Fatalf("readFile failed: %v", err) + } + + s := sections{} + if err = json.Unmarshal(content, &s); err != nil { + t.Fatalf("test status failed: %v", err) + } + if s.Setting.Name != "test" || s.Setting.Storage != "file" { + t.Fatalf("test status failed: %v", err) + } + }) + }) +} diff --git a/cmd/sync.go b/cmd/sync.go new file mode 100644 index 0000000..8d8b5be --- /dev/null +++ b/cmd/sync.go @@ -0,0 +1,298 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "net" + "net/http" + _ "net/http/pprof" + "net/url" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" + "syscall" + + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/sync" + "github.com/urfave/cli/v2" + "golang.org/x/term" +) + +func supportHTTPS(name, endpoint string) bool { + switch name { + case "ufile": + return !(strings.Contains(endpoint, ".internal-") || strings.HasSuffix(endpoint, ".ucloud.cn")) + case "oss": + return !(strings.Contains(endpoint, ".vpc100-oss") || strings.Contains(endpoint, "internal.aliyuncs.com")) + case "jss": + return false + case "s3": + ps := strings.SplitN(strings.Split(endpoint, ":")[0], ".", 2) + if len(ps) > 1 && net.ParseIP(ps[1]) != nil { + return false + } + case "minio": + return false + } + return true +} + +// Check if uri is local file path +func isFilePath(uri string) bool { + // check drive pattern when running on Windows + if runtime.GOOS == "windows" && + len(uri) > 1 && (('a' <= uri[0] && uri[0] <= 'z') || + ('A' <= uri[0] && uri[0] <= 'Z')) && uri[1] == ':' { + return true + } + return !strings.Contains(uri, ":") +} + +func createSyncStorage(uri string, conf *sync.Config) (object.ObjectStorage, error) { + if !strings.Contains(uri, "://") { + if isFilePath(uri) { + absPath, err := filepath.Abs(uri) + if err != nil { + logger.Fatalf("invalid path: %s", err.Error()) + } + if !strings.HasPrefix(absPath, "/") { // Windows path + absPath = "/" + strings.Replace(absPath, "\\", "/", -1) + } + if strings.HasSuffix(uri, "/") { + absPath += "/" + } + + // Windows: file:///C:/a/b/c, Unix: file:///a/b/c + uri = "file://" + absPath + } else { // sftp + var user string + if strings.Contains(uri, "@") { + parts := strings.Split(uri, "@") + user = parts[0] + uri = parts[1] + } + var pass string + if strings.Contains(user, ":") { + parts := strings.Split(user, ":") + user = parts[0] + pass = parts[1] + } else if os.Getenv("SSH_PRIVATE_KEY_PATH") == "" { + fmt.Print("Enter Password: ") + bytePassword, err := term.ReadPassword(int(syscall.Stdin)) + if err != nil { + logger.Fatalf("Read password: %s", err.Error()) + } + pass = string(bytePassword) + } + return object.CreateStorage("sftp", uri, user, pass) + } + } + u, err := url.Parse(uri) + if err != nil { + logger.Fatalf("Can't parse %s: %s", uri, err.Error()) + } + user := u.User + var accessKey, secretKey string + if user != nil { + accessKey = user.Username() + secretKey, _ = user.Password() + } + name := strings.ToLower(u.Scheme) + endpoint := u.Host + + isS3PathTypeUrl := isS3PathType(endpoint) + + if name == "file" { + endpoint = u.Path + } else if name == "hdfs" { + } else if !conf.NoHTTPS && supportHTTPS(name, endpoint) { + endpoint = "https://" + endpoint + } else { + endpoint = "http://" + endpoint + } + if name == "minio" || name == "s3" && isS3PathTypeUrl { + // bucket name is part of path + endpoint += u.Path + } + + store, err := object.CreateStorage(name, endpoint, accessKey, secretKey) + if err != nil { + return nil, fmt.Errorf("create %s %s: %s", name, endpoint, err) + } + if conf.Perms { + if _, ok := store.(object.FileSystem); !ok { + logger.Warnf("%s is not a file system, can not preserve permissions", store) + conf.Perms = false + } + } + switch name { + case "file": + case "minio": + if strings.Count(u.Path, "/") > 1 { + // skip bucket name + store = object.WithPrefix(store, strings.SplitN(u.Path[1:], "/", 2)[1]) + } + case "s3": + if isS3PathTypeUrl && strings.Count(u.Path, "/") > 1 { + store = object.WithPrefix(store, strings.SplitN(u.Path[1:], "/", 2)[1]) + } else if len(u.Path) > 1 { + store = object.WithPrefix(store, u.Path[1:]) + } + default: + if len(u.Path) > 1 { + store = object.WithPrefix(store, u.Path[1:]) + } + } + return store, nil +} + +func isS3PathType(endpoint string) bool { + //localhost[:8080] 127.0.0.1[:8080] s3.ap-southeast-1.amazonaws.com[:8080] s3-ap-southeast-1.amazonaws.com[:8080] + pattern := `^((localhost)|(s3[.-].*\.amazonaws\.com)|((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[1-9])\.((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){2}(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)))?(:\d*)?$` + return regexp.MustCompile(pattern).MatchString(endpoint) +} + +const USAGE = `juicefs [options] sync [options] SRC DST +SRC and DST should be [NAME://][ACCESS_KEY:SECRET_KEY@]BUCKET[.ENDPOINT][/PREFIX]` + +func doSync(c *cli.Context) error { + setLoggerLevel(c) + + if c.Args().Len() != 2 { + logger.Errorf(USAGE) + return nil + } + config := sync.NewConfigFromCli(c) + go func() { _ = http.ListenAndServe(fmt.Sprintf("127.0.0.1:%d", config.HTTPPort), nil) }() + + // Windows support `\` and `/` as its separator, Unix only use `/` + srcURL := strings.Replace(c.Args().Get(0), "\\", "/", -1) + dstURL := strings.Replace(c.Args().Get(1), "\\", "/", -1) + if strings.HasSuffix(srcURL, "/") != strings.HasSuffix(dstURL, "/") { + logger.Fatalf("SRC and DST should both end with path separator or not!") + } + src, err := createSyncStorage(srcURL, config) + if err != nil { + return err + } + dst, err := createSyncStorage(dstURL, config) + if err != nil { + return err + } + return sync.Sync(src, dst, config) +} + +func syncFlags() *cli.Command { + return &cli.Command{ + Name: "sync", + Usage: "sync between two storage", + ArgsUsage: "SRC DST", + Action: doSync, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "start", + Aliases: []string{"s"}, + Value: "", + Usage: "the first `KEY` to sync", + }, + &cli.StringFlag{ + Name: "end", + Aliases: []string{"e"}, + Value: "", + Usage: "the last `KEY` to sync", + }, + &cli.IntFlag{ + Name: "threads", + Aliases: []string{"p"}, + Value: 10, + Usage: "number of concurrent threads", + }, + &cli.IntFlag{ + Name: "http-port", + Value: 6070, + Usage: "HTTP `PORT` to listen to", + }, + &cli.BoolFlag{ + Name: "update", + Aliases: []string{"u"}, + Usage: "skip files if the destination is newer", + }, + &cli.BoolFlag{ + Name: "force-update", + Aliases: []string{"f"}, + Usage: "always update existing files", + }, + &cli.BoolFlag{ + Name: "perms", + Usage: "preserve permissions", + }, + &cli.BoolFlag{ + Name: "dirs", + Usage: "Sync directories or holders", + }, + &cli.BoolFlag{ + Name: "dry", + Usage: "Don't copy file", + }, + &cli.BoolFlag{ + Name: "delete-src", + Aliases: []string{"deleteSrc"}, + Usage: "delete objects from source those already exist in destination", + }, + &cli.BoolFlag{ + Name: "delete-dst", + Aliases: []string{"deleteDst"}, + Usage: "delete extraneous objects from destination", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude keys containing `PATTERN` (POSIX regular expressions)", + }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "only include keys containing `PATTERN` (POSIX regular expressions)", + }, + &cli.StringFlag{ + Name: "manager", + Usage: "manager address", + }, + &cli.StringSliceFlag{ + Name: "worker", + Usage: "hosts (seperated by comma) to launch worker", + }, + &cli.IntFlag{ + Name: "bwlimit", + Usage: "limit bandwidth in Mbps (0 means unlimited)", + }, + &cli.BoolFlag{ + Name: "no-https", + Usage: "donot use HTTPS", + }, + &cli.BoolFlag{ + Name: "check-all", + Usage: "verify integrity of all files in source and destination", + }, + &cli.BoolFlag{ + Name: "check-new", + Usage: "verify integrity of newly copied files", + }, + }, + } +} diff --git a/cmd/sync_test.go b/cmd/sync_test.go new file mode 100644 index 0000000..78dea79 --- /dev/null +++ b/cmd/sync_test.go @@ -0,0 +1,94 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "testing" + + "github.com/juicedata/juicefs/pkg/object" +) + +func TestSync(t *testing.T) { + if os.Getenv("MINIO_TEST_BUCKET") == "" { + t.Skip() + } + minioDir := "synctest" + localDir := "/tmp/synctest" + defer os.RemoveAll(localDir) + storage, err := object.CreateStorage("minio", os.Getenv("MINIO_TEST_BUCKET"), os.Getenv("MINIO_ACCESS_KEY"), os.Getenv("MINIO_SECRET_KEY")) + if err != nil { + t.Fatalf("create storage failed: %v", err) + } + + testInstances := []struct{ path, content string }{ + {"t1.txt", "content1"}, + {"testDir1/t2.txt", "content2"}, + {"testDir1/testDir3/t3.txt", "content3"}, + } + + for _, instance := range testInstances { + err = storage.Put(fmt.Sprintf("/%s/%s", minioDir, instance.path), bytes.NewReader([]byte(instance.content))) + if err != nil { + t.Fatalf("storage put failed: %v", err) + } + } + syncArgs := []string{"", "sync", fmt.Sprintf("minio://%s/%s", os.Getenv("MINIO_TEST_BUCKET"), minioDir), fmt.Sprintf("file://%s", localDir)} + err = Main(syncArgs) + if err != nil { + t.Fatalf("sync failed: %v", err) + } + + for _, instance := range testInstances { + c, err := ioutil.ReadFile(fmt.Sprintf("%s/%s", localDir, instance.path)) + if err != nil || string(c) != instance.content { + t.Fatalf("sync failed: %v", err) + } + } +} + +func Test_isS3PathType(t *testing.T) { + + tests := []struct { + endpoint string + want bool + }{ + {"localhost", true}, + {"localhost:8080", true}, + {"127.0.0.1", true}, + {"127.0.0.1:8080", true}, + {"s3.ap-southeast-1.amazonaws.com", true}, + {"s3.ap-southeast-1.amazonaws.com:8080", true}, + {"s3-ap-southeast-1.amazonaws.com", true}, + {"s3-ap-southeast-1.amazonaws.com:8080", true}, + {"s3-ap-southeast-1.amazonaws..com:8080", false}, + {"ap-southeast-1.amazonaws.com", false}, + {"s3-ap-southeast-1amazonaws.com:8080", false}, + {"s3-ap-southeast-1", false}, + {"s3-ap-southeast-1:8080", false}, + } + for _, tt := range tests { + t.Run("Test host", func(t *testing.T) { + if got := isS3PathType(tt.endpoint); got != tt.want { + t.Errorf("isS3PathType() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/cmd/umount.go b/cmd/umount.go new file mode 100644 index 0000000..c94ced1 --- /dev/null +++ b/cmd/umount.go @@ -0,0 +1,93 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "runtime" + + "github.com/urfave/cli/v2" +) + +func umountFlags() *cli.Command { + return &cli.Command{ + Name: "umount", + Usage: "unmount a volume", + ArgsUsage: "MOUNTPOINT", + Action: umount, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Usage: "unmount a busy mount point by force", + }, + }, + } +} + +func doUmount(mp string, force bool) error { + var cmd *exec.Cmd + switch runtime.GOOS { + case "darwin": + if force { + cmd = exec.Command("diskutil", "umount", "force", mp) + } else { + cmd = exec.Command("diskutil", "umount", mp) + } + case "linux": + if _, err := exec.LookPath("fusermount"); err == nil { + if force { + cmd = exec.Command("fusermount", "-uz", mp) + } else { + cmd = exec.Command("fusermount", "-u", mp) + } + } else { + if force { + cmd = exec.Command("umount", "-l", mp) + } else { + cmd = exec.Command("umount", mp) + } + } + case "windows": + if !force { + _ = os.Mkdir(filepath.Join(mp, ".UMOUNTIT"), 0755) + return nil + } else { + cmd = exec.Command("taskkill", "/IM", "juicefs.exe", "/F") + } + default: + return fmt.Errorf("OS %s is not supported", runtime.GOOS) + } + out, err := cmd.CombinedOutput() + if err != nil { + log.Print(string(out)) + } + return err +} + +func umount(ctx *cli.Context) error { + if ctx.Args().Len() < 1 { + return fmt.Errorf("MOUNTPOINT is needed") + } + mp := ctx.Args().Get(0) + force := ctx.Bool("force") + return doUmount(mp, force) +} diff --git a/cmd/umount_test.go b/cmd/umount_test.go new file mode 100644 index 0000000..985903e --- /dev/null +++ b/cmd/umount_test.go @@ -0,0 +1,51 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "testing" + + "github.com/juicedata/juicefs/pkg/utils" +) + +func UmountTmp(mountpoint string) error { + umountArgs := []string{"", "umount", mountpoint} + return Main(umountArgs) +} + +func TestUmount(t *testing.T) { + + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + + inode, err := utils.GetFileInode(mountpoint) + if err != nil { + t.Fatalf("get file inode failed: %v", err) + } + if inode == 1 { + t.Fatalf("umount failed: %v", err) + } + +} diff --git a/cmd/warmup.go b/cmd/warmup.go new file mode 100644 index 0000000..fbd76e6 --- /dev/null +++ b/cmd/warmup.go @@ -0,0 +1,175 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bufio" + "os" + "path/filepath" + "strings" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/urfave/cli/v2" +) + +const batchMax = 10240 + +// send fill-cache command to controller file +func sendCommand(cf *os.File, batch []string, count int, threads uint, background bool) { + paths := strings.Join(batch[:count], "\n") + var back uint8 + if background { + back = 1 + } + wb := utils.NewBuffer(8 + 4 + 3 + uint32(len(paths))) + wb.Put32(meta.FillCache) + wb.Put32(4 + 3 + uint32(len(paths))) + wb.Put32(uint32(len(paths))) + wb.Put([]byte(paths)) + wb.Put16(uint16(threads)) + wb.Put8(back) + if _, err := cf.Write(wb.Bytes()); err != nil { + logger.Fatalf("Write message: %s", err) + } + if background { + logger.Infof("Warm-up cache for %d paths in backgroud", count) + return + } + var errs = make([]byte, 1) + if n, err := cf.Read(errs); err != nil || n != 1 { + logger.Fatalf("Read message: %d %s", n, err) + } + if errs[0] != 0 { + logger.Fatalf("Warm up failed: %d", errs[0]) + } +} + +func warmup(ctx *cli.Context) error { + fname := ctx.String("file") + paths := ctx.Args().Slice() + if fname != "" { + fd, err := os.Open(fname) + if err != nil { + logger.Fatalf("Failed to open file %s: %s", fname, err) + } + defer fd.Close() + scanner := bufio.NewScanner(fd) + for scanner.Scan() { + if p := strings.TrimSpace(scanner.Text()); p != "" { + paths = append(paths, p) + } + } + if err := scanner.Err(); err != nil { + logger.Fatalf("Reading file %s failed with error: %s", fname, err) + } + } + if len(paths) == 0 { + logger.Infof("Nothing to warm up") + return nil + } + + // find mount point + first, err := filepath.Abs(paths[0]) + if err != nil { + logger.Fatalf("Failed to get abs of %s: %s", paths[0], err) + } + st, err := os.Stat(first) + if err != nil { + logger.Fatalf("Failed to stat path %s: %s", first, err) + } + var mp string + if st.IsDir() { + mp = first + } else { + mp = filepath.Dir(first) + } + for ; mp != "/"; mp = filepath.Dir(mp) { + inode, err := utils.GetFileInode(mp) + if err != nil { + logger.Fatalf("Failed to lookup inode for %s: %s", mp, err) + } + if inode == 1 { + break + } + } + if mp == "/" { + logger.Fatalf("Path %s is not inside JuiceFS", first) + } + + controller := openController(mp) + if controller == nil { + logger.Fatalf("Failed to open control file under %s", mp) + } + defer controller.Close() + + threads := ctx.Uint("threads") + background := ctx.Bool("background") + start := len(mp) + batch := make([]string, batchMax) + progress := utils.NewProgress(background, false) + bar := progress.AddCountBar("Warmed up paths", int64(len(paths))) + var index int + for _, path := range paths { + if strings.HasPrefix(path, mp) { + batch[index] = path[start:] + index++ + } else { + logger.Warnf("Path %s is not under mount point %s", path, mp) + continue + } + if index >= batchMax { + sendCommand(controller, batch, index, threads, background) + bar.IncrBy(index) + index = 0 + } + } + if index > 0 { + sendCommand(controller, batch, index, threads, background) + bar.IncrBy(index) + } + progress.Done() + + return nil +} + +func warmupFlags() *cli.Command { + return &cli.Command{ + Name: "warmup", + Usage: "build cache for target directories/files", + ArgsUsage: "[PATH ...]", + Action: warmup, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "file", + Aliases: []string{"f"}, + Usage: "file containing a list of paths", + }, + &cli.UintFlag{ + Name: "threads", + Aliases: []string{"p"}, + Value: 50, + Usage: "number of concurrent workers", + }, + &cli.BoolFlag{ + Name: "background", + Aliases: []string{"b"}, + Usage: "run in background", + }, + }, + } +} diff --git a/cmd/warmup_test.go b/cmd/warmup_test.go new file mode 100644 index 0000000..dc1ed74 --- /dev/null +++ b/cmd/warmup_test.go @@ -0,0 +1,82 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "runtime" + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/meta" +) + +func TestWarmup(t *testing.T) { + metaUrl := "redis://127.0.0.1:6379/10" + mountpoint := "/tmp/testDir" + defer ResetRedis(metaUrl) + if err := MountTmp(metaUrl, mountpoint); err != nil { + t.Fatalf("mount failed: %v", err) + } + defer func() { + err := UmountTmp(mountpoint) + if err != nil { + t.Fatalf("umount failed: %v", err) + } + }() + + err := ioutil.WriteFile(fmt.Sprintf("%s/f1.txt", mountpoint), []byte("test"), 0644) + if err != nil { + t.Fatalf("test mount failed: %v", err) + } + m := meta.NewClient(metaUrl, &meta.Config{Retries: 10, Strict: true}) + format, err := m.Load() + if err != nil { + t.Fatalf("load setting err: %s", err) + } + uuid := format.UUID + var cacheDir string + var filePath string + switch runtime.GOOS { + case "darwin", "windows": + homeDir, err := os.UserHomeDir() + if err != nil { + t.Fatalf("%v", err) + } + cacheDir = fmt.Sprintf("%s/.juicefs/cache", homeDir) + default: + cacheDir = "/var/jfsCache" + } + + os.RemoveAll(fmt.Sprintf("%s/%s", cacheDir, uuid)) + defer os.RemoveAll(fmt.Sprintf("%s/%s", cacheDir, uuid)) + + warmupArgs := []string{"", "warmup", mountpoint} + err = Main(warmupArgs) + if err != nil { + t.Fatalf("warmup error: %v", err) + } + + time.Sleep(2 * time.Second) + filePath = fmt.Sprintf("%s/%s/raw/chunks/0/0/1_0_4", cacheDir, uuid) + content, err := ioutil.ReadFile(filePath) + if err != nil || string(content) != "test" { + t.Fatalf("warmup error:%v", err) + } +} diff --git a/deploy/juicefs-s3-gateway.yaml b/deploy/juicefs-s3-gateway.yaml new file mode 100644 index 0000000..75d4c4d --- /dev/null +++ b/deploy/juicefs-s3-gateway.yaml @@ -0,0 +1,91 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: juicefs-s3-gateway + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: juicefs-s3-gateway + template: + metadata: + labels: + app.kubernetes.io/name: juicefs-s3-gateway + spec: + initContainers: + - name: format + image: juicedata/juicefs-csi-driver:v0.12.0 + command: + - sh + - -c + - juicefs format --storage=${storage} --bucket=${bucket} --access-key=${accesskey} --secret-key=${secretkey} ${metaurl} ${name} + envFrom: + - secretRef: + name: juicefs-secret + env: + - name: accesskey + valueFrom: + secretKeyRef: + name: juicefs-secret + key: access-key + - name: secretkey + valueFrom: + secretKeyRef: + name: juicefs-secret + key: secret-key + containers: + - name: gateway + image: juicedata/juicefs-csi-driver:v0.11.1 + command: + - sh + - -c + - juicefs gateway ${METAURL} ${NODE_IP}:9000 --metrics=${NODE_IP}:9567 + env: + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: METAURL + valueFrom: + secretKeyRef: + name: juicefs-secret + key: metaurl + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: juicefs-secret + key: access-key + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: juicefs-secret + key: secret-key + ports: + - containerPort: 9000 + - containerPort: 9567 + resources: + limits: + cpu: 5000m + memory: 5Gi + requests: + cpu: 1000m + memory: 1Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: juicefs-s3-gateway + namespace: kube-system + labels: + app.kubernetes.io/name: juicefs-s3-gateway +spec: + selector: + app.kubernetes.io/name: juicefs-s3-gateway + ports: + - name: http + port: 9000 + targetPort: 9000 + - name: metrics + port: 9567 + targetPort: 9567 diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..9545c21 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,10 @@ +# JuiceFS User Manual + +For reading on GitHub, please select your language: + +- [🇬🇧 English](en/README.md) +- [🇨🇳 简体中文](zh_cn/README.md) + +Or, visit the JuiceFS Documentation Center at + +[🌍 https://juicefs.com/docs/](https://juicefs.com/docs/) \ No newline at end of file diff --git a/docs/en/README.md b/docs/en/README.md new file mode 100644 index 0000000..e7f18ce --- /dev/null +++ b/docs/en/README.md @@ -0,0 +1,22 @@ +# JuiceFS User Manual + +[![license](https://img.shields.io/badge/license-Apache%20v2.0-blue)](https://github.com/juicedata/juicefs/blob/main/LICENSE) [![Go Report](https://img.shields.io/badge/go%20report-A+-brightgreen.svg?style=flat)](https://goreportcard.com/badge/github.com/juicedata/juicefs) [![Join Slack](https://badgen.net/badge/Slack/Join%20JuiceFS/0abd59?icon=slack)](https://join.slack.com/t/juicefs/shared_invite/zt-n9h5qdxh-0bJojPaql8cfFgwerDQJgA) + +![JuiceFS LOGO](images/juicefs-logo.png) + +JuiceFS is a high-performance [POSIX](https://en.wikipedia.org/wiki/POSIX) file system released under Apache License 2.0. It is specially optimized for the cloud-native environment. Using the JuiceFS to store data, the data itself will be persisted in object storage (e.g. Amazon S3), and the metadata corresponding to the data can be persisted in various database engines such as Redis, MySQL, and SQLite according to the needs of the scene. + +JuiceFS can simply and conveniently connect massive cloud storage directly to big data, machine learning, artificial intelligence, and various application platforms that have been put into production environment, without modifying the code, you can use massive cloud storage as efficiently as using local storage. + +## Highlighted Features + +1. **Fully POSIX-compatible**: Use like a local file system, seamlessly docking with existing applications, no business intrusion. +2. **Fully Hadoop-compatible**: JuiceFS [Hadoop Java SDK](deployment/hadoop_java_sdk.md) is compatible with Hadoop 2.x and Hadoop 3.x. As well as variety of components in Hadoop ecosystem. +3. **S3-compatible**: JuiceFS [S3 Gateway](deployment/s3_gateway.md) provides S3-compatible interface. +4. **Cloud Native**: JuiceFS provides [Kubernetes CSI driver](deployment/how_to_use_on_kubernetes.md) to help people who want to use JuiceFS in Kubernetes. +5. **Sharing**: JuiceFS is a shared file storage that can be read and written by thousands clients. +6. **Strong Consistency**: The confirmed modification will be immediately visible on all servers mounted with the same file system . +7. **Outstanding Performance**: The latency can be as low as a few milliseconds and the throughput can be expanded to nearly unlimited. [Test results](benchmark/benchmark.md) +8. **Data Encryption**: Supports data encryption in transit and at rest, read [the guide](security/encrypt.md) for more information. +9. **Global File Locks**: JuiceFS supports both BSD locks (flock) and POSIX record locks (fcntl). +10. **Data Compression**: JuiceFS supports use [LZ4](https://lz4.github.io/lz4) or [Zstandard](https://facebook.github.io/zstd) to compress all your data. diff --git a/docs/en/administration/cache_management.md b/docs/en/administration/cache_management.md new file mode 100644 index 0000000..d74b4b7 --- /dev/null +++ b/docs/en/administration/cache_management.md @@ -0,0 +1,109 @@ +--- +sidebar_label: Cache +sidebar_position: 5 +slug: /cache_management +--- +# Cache + +For a file system driven by a combination of object storage and database, the cache is an important medium for efficient interaction between the local client and the remote service. Read and write data can be loaded into the cache in advance or asynchronously, and then the client interacts with the remote service in the background to perform asynchronous uploads or prefetching of data. The use of caching technology can significantly reduce the latency of storage operations and increase data throughput compared to interacting with remote services directly. + +JuiceFS provides various caching mechanisms including metadata caching, data read/write caching, etc. + +## Data Consistency + +JuiceFS provides a "close-to-open" consistency guarantee, which means that when two or more clients read and write the same file at the same time, the changes made by client A may not be immediately visible to client B. However, once the file is closed by client A, any client re-opened it afterwards is guaranteed to see the latest data, no matter it is on the same node with A or not. + +"Close-to-open" is the minimum consistency guarantee provided by JuiceFS, and in some cases it may not be necessary to reopen the file to access the latest written data. For example, multiple applications using the same JuiceFS client to access the same file (where file changes are immediately visible), or to view the latest data on different nodes with the `tail -f` command. + +## Metadata Cache + +JuiceFS supports caching metadata in kernel and client memory (i.e. JuiceFS processes) to improve metadata access performance. + +### Metadata Cache in Kernel + +Three kinds of metadata can be cached in kernel: **attributes (attribute)**, **file entries (entry)** and **directory entries (direntry)**. The cache timeout can be controlled by the following [mount parameter](../reference/command_reference.md#juicefs-mount): + +``` +--attr-cache value attributes cache timeout in seconds (default: 1) +--entry-cache value file entry cache timeout in seconds (default: 1) +--dir-entry-cache value dir entry cache timeout in seconds (default: 1) +``` + +JuiceFS caches attributes, file entries, and directory entries in kernel for 1 second by default to improve lookup and getattr performance. When clients on multiple nodes are using the same file system, the metadata cached in kernel will only be expired by time. That is, in an extreme case, it may happen that node A modifies the metadata of a file (e.g., `chown`) and accesses it through node B without immediately seeing the update. Of course, when the cache expires, all nodes will eventually be able to see the changes made by A. + +### Metadata Cache in Client + +> **Note**: This feature requires JuiceFS >= 0.15.2. + +When a JuiceFS client `open()` a file, its file attributes are automatically cached in client memory. If the [`--open-cache`](../reference/command_reference.md#juicefs-mount) option is set to a value greater than 0 when mounting the file system, subsequent `getattr()` and `open()` operations will return the result from the in-memory cache immediately, as long as the cache has not timed out. + +When a file is read by `read()`, the chunk and slice information of the file is automatically cached in client memory. Reading the chunk again during the cache lifetime will return the slice information from the in-memory cache immediately. + +> **Hint**: You can check ["How JuiceFS Stores Files"](../reference/how_juicefs_store_files.md) to know what chunk and slice are. + +By default, for any file whose metadata has been cached in memory and not accessed by any process for more than 1 hour, all its metadata cache will be automatically deleted. + +## Data Cache + +Data cache is also provided in JuiceFS to improve performance, including page cache in the kernel and local cache in client host. + +### Data Cache in Kernel + +> **Note**: This feature requires JuiceFS >= 0.15.2. + +For files that have already been read, the kernel automatically caches their contents. Then if the file is opened again, and it's not changed (i.e., mtime has not been updated), it can be read directly from the kernel cache for the best performance. + +Thanks to the kernel cache, repeated reads of the same file in JuiceFS can be very fast, with latencies as low as microseconds and throughputs up to several GiBs per second. + +JuiceFS clients currently do not have kernel write caching enabled by default, starting with [Linux kernel 3.15](https://github.com/torvalds/linux/commit/4d99ff8f12e), FUSE supports ["writeback-cache mode"]( https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt), which means that the `write()` system call can be done very quickly. You can set the [`-o writeback_cache`](../reference/fuse_mount_options.md#writeback_cache) option at [mount file system](../reference/command_reference.md#juicefs-mount) to enable writeback-cache mode. It is recommended to enable this mount option when very small data (e.g. around 100 bytes) needs to be written frequently. + +### Read Cache in Client + +The JuiceFS client automatically prefetch data into the cache based on the read pattern, thus improving sequential read performance. By default, 1 block is prefetch locally concurrently with the read data. The local cache can be set on any local file system based on HDD, SSD or memory. + +> **Hint**: You can check ["How JuiceFS Stores Files"](../reference/how_juicefs_store_files.md) to learn what a block is. + +The local cache can be adjusted at [mount file system](../reference/command_reference.md#juicefs-mount) with the following options. + +``` +--cache-dir value directory paths of local cache, use colon to separate multiple paths (default: "$HOME/.juicefs/cache" or "/var/jfsCache") +--cache-size value size of cached objects in MiB (default: 102400) +--free-space-ratio value min free space (ratio) (default: 0.1) +--cache-partial-only cache only random/small read (default: false) +``` + +Specifically, there are two ways if you want to store the local cache of JuiceFS in memory, one is to set `--cache-dir` to `memory` and the other is to set it to `/dev/shm/`. The difference between these two approaches is that the former deletes the cache data after remounting the JuiceFS file system, while the latter retains it, and there is not much difference in performance between the two. + +The JuiceFS client writes data downloaded from the object store (including new uploads less than 1 block in size) to the cache directory as fast as possible, without compression or encryption. **Because JuiceFS generates unique names for all block objects written to the object store, and all block objects are not modified, there is no need to worry about invalidating the cached data when the file content is updated.** + +The cache is automatically purged when it reaches the maximum space used (i.e., the cache size is greater than or equal to `--cache-size`) or when the disk is going to be full (i.e., the disk free space ratio is less than `--free-space-ratio`), and the current rule is to prioritize purging infrequently accessed files based on access time. + +Data caching can effectively improve the performance of random reads. For applications like Elasticsearch, ClickHouse, etc. that require higher random read performance, it is recommended to set the cache path on a faster storage medium and allocate more cache space. + +### Write Cache in Client + +When writing data, the JuiceFS client caches the data in memory until it is uploaded to the object storage when a chunk is written or when the operation is forced by `close()` or `fsync()`. When `fsync()` or `close()` is called, the client waits for data to be written to the object storage and notifies the metadata service before returning, thus ensuring data integrity. + +In some cases where local storage is reliable and local storage write performance is significantly better than network writes (e.g. SSD disks), write performance can be improved by enabling asynchronous upload of data so that the `close()` operation does not wait for data to be written to the object storage, but returns as soon as the data is written to the local cache directory. + +The asynchronous upload feature is disabled by default and can be enabled with the following options. + +``` +--writeback upload objects in background (default: false) +``` + +When writing a large number of small files in a short period of time, it is recommended to mount the file system with the `--writeback` parameter to improve write performance, and consider re-mounting without the option after the write is complete to make subsequent writes more reliable. It is also recommended to enable `--writeback` for scenarios that require a lot of random writes, such as incremental backups of MySQL. + +> **Warning**: When asynchronous upload is enabled, i.e. `--writeback` is specified when mounting the file system, do not delete the contents in `//rawstaging` directory, as this will result in data loss. + +When the cache disk is too full, it will pause writing data and change to uploading data directly to the object storage (i.e., the client write cache feature is turned off). + +When the asynchronous upload function is enabled, the reliability of the cache itself is directly related to the reliability of data writing, and should be used with caution for scenarios requiring high data reliability. + +## Frequent Asked Questions + +### Why 60 GiB disk spaces are occupied while I set cache size to 50 GiB? + +JuiceFS currently estimates the size of cached objects by adding up the size of all cached objects and adding a fixed overhead (4KiB), which is not exactly the same as the value obtained by the `du` command. + +To prevent the cache disk from being written to full, the client will try to reduce the cache usage when the file system where the cache directory is located is running out of space. diff --git a/docs/en/administration/destroy.md b/docs/en/administration/destroy.md new file mode 100644 index 0000000..b59086b --- /dev/null +++ b/docs/en/administration/destroy.md @@ -0,0 +1,77 @@ +# How to destroy a file system + +JuiceFS client provides the `destroy` command to completely destroy a file system, which will result in the following. + +- Delete all metadata entries of this file system +- Deletes all data blocks of the file system + +The command to destroy a file system is as follows. + +```shell +juicefs destroy +``` + +- ``: The URL address of the metadata engine +- ``: The UUID of the file system + +## Find the UUID of the file system + +The `status` command on the JuiceFS client can view detailed information about a file system by simply specifying the file system's metadata engine URL, e.g. + +```shell {7} +$ juicefs status redis://127.0.0.1:6379/1 + +2022/01/26 21:41:37.577645 juicefs[31181] : Meta address: redis://127.0.0.1:6379/1 +2022/01/26 21:41:37.578238 juicefs[31181] : Ping redis: 55.041µs +{ + "Setting": { + "Name": "macjfs", + "UUID": "eabb96d5-7228-461e-9240-fddbf2b576d8", + "Storage": "file", + "Bucket": "jfs/", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 1 + }, + ... +} +``` + +## Destroy a file system + +:::danger +The destroy operation will cause all the data in the database records and object storage associated with the file system to be deleted, please make sure to backup the important data first before operation! +::: + +```shell +$ juicefs destroy redis://127.0.0.1:6379/1 eabb96d5-7228-461e-9240-fddbf2b576d8 + +2022/01/26 21:52:17.488987 juicefs[31518] : Meta address: redis://127.0.0.1:6379/1 +2022/01/26 21:52:17.489668 juicefs[31518] : Ping redis: 55.542µs + volume name: macjfs + volume UUID: eabb96d5-7228-461e-9240-fddbf2b576d8 +data storage: file://jfs/ + used bytes: 18620416 + used inodes: 23 +WARNING: The target volume will be destoried permanently, including: +WARNING: 1. objects in the data storage +WARNING: 2. entries in the metadata engine +Proceed anyway? [y/N]: y +deleting objects: 68 +The volume has been destroyed! You may need to delete cache directory manually. +``` + +When destroying a file system, the client will issue a confirmation prompt. Please make sure to check the file system information carefully and enter `y` after confirming it is correct. + +## FAQ + +```shell +2022/01/26 21:47:30.949149 juicefs[31483] : 1 sessions are active, please disconnect them first +``` + +If you receive an error like the one above, which indicates that the file system has not been properly unmounted, please check and confirm that all mount points are unmounted before proceeding. diff --git a/docs/en/administration/fault_diagnosis_and_analysis.md b/docs/en/administration/fault_diagnosis_and_analysis.md new file mode 100644 index 0000000..e7ac2be --- /dev/null +++ b/docs/en/administration/fault_diagnosis_and_analysis.md @@ -0,0 +1,115 @@ +--- +sidebar_label: Fault Diagnosis and Analysis +sidebar_position: 9 +slug: /fault_diagnosis_and_analysis +--- + +# Fault Diagnosis and Analysis + +## Error Log + +When JuiceFS run in background (through [`-d` option](../reference/command_reference.md#juicefs-mount) when mount volume), logs will output to syslog and `/var/log/juicefs.log` (v0.15+, refer to [`--log` option](../reference/command_reference.md#juicefs-mount)). Depending on your operating system, you can get the logs through different commands: + +```bash +# macOS +$ syslog | grep 'juicefs' + +# Debian based system +$ cat /var/log/syslog | grep 'juicefs' + +# CentOS based system +$ cat /var/log/messages | grep 'juicefs' + +# v0.15+ +$ tail -n 100 /var/log/juicefs.log +``` + +There are 4 log levels. You can use the `grep` command to filter different levels of logs for performance analysis or troubleshooting: + +``` +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +``` + +## Access Log + +There is a virtual file called `.accesslog` in the root of JuiceFS to show all the operations and the time they takes, for example: + +```bash +$ cat /jfs/.accesslog +2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010> +2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014> +2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006> +``` + +The last number on each line is the time (in seconds) current operation takes. You can use this to know information of every operation, or try `juicefs profile /jfs` to monitor aggregated statistics. Please run `juicefs profile -h` or refer to [here](../benchmark/operations_profiling.md) to learn more about this subcommand. + +## Runtime Information + +By default, JuiceFS clients will listen to a TCP port locally via [pprof](https://pkg.go.dev/net/http/pprof) to get runtime information such as Goroutine stack information, CPU performance statistics, memory allocation statistics. You can see the specific port number that the current JuiceFS client is listening on by using the system command (e.g. `lsof`): + +:::note +If JuiceFS is mounted via the root user, then you need to add `sudo` before the `lsof` command. +::: + +```bash +$ lsof -i -nP | grep LISTEN | grep juicefs +juicefs 32666 user 8u IPv4 0x44992f0610d9870b 0t0 TCP 127.0.0.1:6061 (LISTEN) +juicefs 32666 user 9u IPv4 0x44992f0619bf91cb 0t0 TCP 127.0.0.1:6071 (LISTEN) +juicefs 32666 user 15u IPv4 0x44992f062886fc5b 0t0 TCP 127.0.0.1:9567 (LISTEN) +``` + +By default, pprof listens on port numbers starting at 6060 and ending at 6099, so the actual port number in the above example is 6061. Once you get the listening port number, you can view all the available runtime information at `http://localhost:/debug/pprof`, and some important runtime information is as follows: + +- Goroutine stack information: `http://localhost:/debug/pprof/goroutine?debug=1` +- CPU performance statistics: `http://localhost:/debug/pprof/profile?seconds=30` +- Memory allocation statistics: `http://localhost:/debug/pprof/heap` + +To make it easier to analyze this runtime information, you can save it locally, e.g.: + +```bash +$ curl 'http://localhost:/debug/pprof/goroutine?debug=1' > juicefs.goroutine.txt +$ curl 'http://localhost:/debug/pprof/profile?seconds=30' > juicefs.cpu.pb.gz +$ curl 'http://localhost:/debug/pprof/heap' > juicefs.heap.pb.gz +``` + +If you have the `go` command installed, you can analyze it directly with the `go tool pprof` command, for example to analyze CPU performance statistics: + +```bash +$ go tool pprof 'http://localhost:/debug/pprof/profile?seconds=30' +Fetching profile over HTTP from http://localhost:/debug/pprof/profile?seconds=30 +Saved profile in /Users/xxx/pprof/pprof.samples.cpu.001.pb.gz +Type: cpu +Time: Dec 17, 2021 at 1:41pm (CST) +Duration: 30.12s, Total samples = 32.06s (106.42%) +Entering interactive mode (type "help" for commands, "o" for options) +(pprof) top +Showing nodes accounting for 30.57s, 95.35% of 32.06s total +Dropped 285 nodes (cum <= 0.16s) +Showing top 10 nodes out of 192 + flat flat% sum% cum cum% + 14.73s 45.95% 45.95% 14.74s 45.98% runtime.cgocall + 7.39s 23.05% 69.00% 7.41s 23.11% syscall.syscall + 2.92s 9.11% 78.10% 2.92s 9.11% runtime.pthread_cond_wait + 2.35s 7.33% 85.43% 2.35s 7.33% runtime.pthread_cond_signal + 1.13s 3.52% 88.96% 1.14s 3.56% runtime.nanotime1 + 0.77s 2.40% 91.36% 0.77s 2.40% syscall.Syscall + 0.49s 1.53% 92.89% 0.49s 1.53% runtime.memmove + 0.31s 0.97% 93.86% 0.31s 0.97% runtime.kevent + 0.27s 0.84% 94.70% 0.27s 0.84% runtime.usleep + 0.21s 0.66% 95.35% 0.21s 0.66% runtime.madvise +``` + +Runtime information can also be exported to visual charts for a more intuitive analysis. The visual charts support exporting to various formats such as HTML, PDF, SVG, PNG, etc. For example, the command to export memory allocation statistics as a PDF file is as follows: + +:::note +The export to visual chart function relies on [Graphviz](https://graphviz.org), so please install it first. +::: + +```bash +$ go tool pprof -pdf 'http://localhost:/debug/pprof/heap' > juicefs.heap.pdf +``` + +For more information about pprof, please see the [official documentation](https://github.com/google/pprof/blob/master/doc/README.md). diff --git a/docs/en/administration/metadata/_mysql_best_practices.md b/docs/en/administration/metadata/_mysql_best_practices.md new file mode 100644 index 0000000..459d058 --- /dev/null +++ b/docs/en/administration/metadata/_mysql_best_practices.md @@ -0,0 +1,5 @@ +--- +sidebar_label: MySQL Best Practices +sidebar_position: 2 +--- +# MySQL Best Practices \ No newline at end of file diff --git a/docs/en/administration/metadata/_tikv_best_practices.md b/docs/en/administration/metadata/_tikv_best_practices.md new file mode 100644 index 0000000..146b3be --- /dev/null +++ b/docs/en/administration/metadata/_tikv_best_practices.md @@ -0,0 +1,5 @@ +--- +sidebar_label: TiKV Best Practices +sidebar_position: 3 +--- +# TiKV Best Practices \ No newline at end of file diff --git a/docs/en/administration/metadata/postgresql_best_practices.md b/docs/en/administration/metadata/postgresql_best_practices.md new file mode 100644 index 0000000..ae5c551 --- /dev/null +++ b/docs/en/administration/metadata/postgresql_best_practices.md @@ -0,0 +1,51 @@ +--- +sidebar_label: PostgreSQL +sidebar_position: 2 +--- +# PostgreSQL Best Practices + +For distributed file systems where data and metadata are stored separately, the read and write performance of metadata directly affects the efficiency of the whole system, and the security of metadata is also directly related to the data security of the whole system. + +In the production environment, it is recommended that you give priority to the hosted cloud database provided by the cloud computing platform with appropriate high availability architecture. + +Whether you build it yourself or use a cloud database, you should always pay attention to the integrity and security of metadata when using JuiceFS. + +## Communication Security + +By default, JuiceFS clients will use SSL encryption to connect to PostgreSQL. If SSL encryption is not enabled on the database, you need to append the `sslmode=disable` parameter to the metadata URL. + +It is recommended to configure and always enable SSL encryption on the database server side. + +## Passing sensitive information via environment variables + +Although it is easy and convenient to set the database password directly in the metadata URL, the password may be leaked in logs or program output, and for data security, the database password should always be passed through an environment variable. + +Environment variable names can be freely defined, e.g. + +```shell +export $PG_PASSWD=mypassword +``` + +Passing the database password in the metadata URL via environment variables. + +```shell +juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs" /mnt/jfs +``` + +## Backup periodically + +Please refer to the official manual [Chapter 26. Backup and Restore](https://www.postgresql.org/docs/current/backup.html) to learn how to backup and restore the database. + +It is recommended to make a database backup plan and follow it periodically, and at the same time, try to restore the data in an experimental environment to confirm that the backup is valid. + +## Using connection pooling + +Connection pooling is an intermediate layer between the client and the database, which acts as an intermediary to improve connection efficiency and reduce the loss of short connections. Commonly used connection pools are [PgBouncer](https://www.pgbouncer.org/) and [Pgpool-II](https://www.pgpool.net/). + +## High Availability + +The official PostgreSQL document [High Availability, Load Balancing, and Replication](https://www.postgresql.org/docs/current/different-replication-solutions.html) compares several common database high availability solutions, please choose the appropriate according to your needs. + +:::note +JuiceFS uses [transactions](https://www.postgresql.org/docs/current/tutorial-transactions.html) to ensure atomicity of metadata operations. Since PostgreSQL does not yet support Muti-Shard (Distributed) transactions, do not use a multi-server distributed architecture for the JuiceFS metadata. +::: diff --git a/docs/en/administration/metadata/redis_best_practices.md b/docs/en/administration/metadata/redis_best_practices.md new file mode 100644 index 0000000..b7ca5dd --- /dev/null +++ b/docs/en/administration/metadata/redis_best_practices.md @@ -0,0 +1,126 @@ +--- +sidebar_label: Redis +sidebar_position: 1 +slug: /redis_best_practices +--- +# Redis Best Practices + +This is a guide about Redis best practices. Redis is a critical component in JuiceFS architecture. It stores all the file system metadata and serve metadata operation from client. If Redis has any problem (either service unavailable or lose data), it will affect the user experience. + +**It's highly recommended use Redis service managed by public cloud provider if possible.** See ["Recommended Managed Redis Service"](#recommended-managed-redis-service) for more information. If you still need operate Redis by yourself in production environment, continue read following contents. + +## Memory usage + +The space used by the JuiceFS metadata engine is mainly related to the number of files in the file system. According to our experience, the metadata of each file occupies approximately 300 bytes of memory. Therefore, if you want to store 100 million files, approximately 30 GiB of memory is required. + +You can check the specific memory usage through Redis's [`INFO memory`](https://redis.io/commands/info) command, for example: + +``` +> INFO memory +used_memory: 19167628056 +used_memory_human: 17.85G +used_memory_rss: 20684886016 +used_memory_rss_human: 19.26G +... +used_memory_overhead: 5727954464 +... +used_memory_dataset: 13439673592 +used_memory_dataset_perc: 70.12% +``` + +Among them, `used_memory_rss` is the total memory size actually used by Redis, which includes not only the size of data stored in Redis (that is, `used_memory_dataset` above), but also some Redis [system overhead](https://redis.io/commands/memory-stats) (that is, `used_memory_overhead` above). As mentioned earlier, the metadata of each file occupies about 300 bytes and is calculated by `used_memory_dataset`. If you find that the metadata of a single file in your JuiceFS file system occupies much more than 300 bytes, you can try to run [`juicefs gc`](../../reference/command_reference.md#juicefs-gc) command to clean up possible redundant data. + +--- + +> **Note**: The following paragraphs are extracted from Redis official documentation. It may outdated, subject to latest version of the official documentation. + +## High Availability + +[Redis Sentinel](https://redis.io/topics/sentinel) is the official high availability solution for Redis. It provides following capabilities: + +- **Monitoring**. Sentinel constantly checks if your master and replica instances are working as expected. +- **Notification**. Sentinel can notify the system administrator, or other computer programs, via an API, that something is wrong with one of the monitored Redis instances. +- **Automatic failover**. If a master is not working as expected, Sentinel can start a failover process where a replica is promoted to master, the other additional replicas are reconfigured to use the new master, and the applications using the Redis server are informed about the new address to use when connecting. +- **Configuration provider**. Sentinel acts as a source of authority for clients service discovery: clients connect to Sentinels in order to ask for the address of the current Redis master responsible for a given service. If a failover occurs, Sentinels will report the new address. + +**A stable release of Redis Sentinel is shipped since Redis 2.8**. Redis Sentinel version 1, shipped with Redis 2.6, is deprecated and should not be used. + +There're some [fundamental things](https://redis.io/topics/sentinel#fundamental-things-to-know-about-sentinel-before-deploying) you need to know about Redis Sentinel before using it: + +1. You need at least three Sentinel instances for a robust deployment. +2. The three Sentinel instances should be placed into computers or virtual machines that are believed to fail in an independent way. So for example different physical servers or Virtual Machines executed on different availability zones. +3. **Sentinel + Redis distributed system does not guarantee that acknowledged writes are retained during failures, since Redis uses asynchronous replication.** However there are ways to deploy Sentinel that make the window to lose writes limited to certain moments, while there are other less secure ways to deploy it. +4. There is no HA setup which is safe if you don't test from time to time in development environments, or even better if you can, in production environments, if they work. You may have a misconfiguration that will become apparent only when it's too late (at 3am when your master stops working). +5. **Sentinel, Docker, or other forms of Network Address Translation or Port Mapping should be mixed with care**: Docker performs port remapping, breaking Sentinel auto discovery of other Sentinel processes and the list of replicas for a master. + +Please read the [official documentation](https://redis.io/topics/sentinel) for more information. + +Once Redis servers and Sentinels are deployed, the `META-URL` can be specified as `redis[s]://[[USER]:PASSWORD@]MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`, for example: + +```bash +$ ./juicefs mount redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2 ~/jfs +``` + +> **Note**: For v0.16+, the `PASSWORD` in the URL will be used to connect Redis server, the password for Sentinel +> should be provided using environment variable `SENTINEL_PASSWORD`. For early versions, the `PASSWORD` is used for both +> Redis server and Sentinel, they can be overrode by environment variables `SENTINEL_PASSWORD` and `REDIS_PASSWORD`. + +## Data Durability + +Redis provides a different range of [persistence](https://redis.io/topics/persistence) options: + +- The RDB persistence performs point-in-time snapshots of your dataset at specified intervals. +- The AOF persistence logs every write operation received by the server, that will be played again at server startup, reconstructing the original dataset. Commands are logged using the same format as the Redis protocol itself, in an append-only fashion. Redis is able to rewrite the log in the background when it gets too big. +- It is possible to combine both AOF and RDB in the same instance. Notice that, in this case, when Redis restarts the AOF file will be used to reconstruct the original dataset since it is guaranteed to be the most complete. + +**It's recommended enable RDB and AOF simultaneously.** Beware that when use AOF you can have different fsync policies: no fsync at all, fsync every second, fsync at every query. With the default policy of fsync every second write performances are still great (fsync is performed using a background thread and the main thread will try hard to perform writes when no fsync is in progress.) **but you can only lose one second worth of writes**. + +**Remember backup is also required** (disk may break, VM may disappear). Redis is very data backup friendly since you can copy RDB files while the database is running: the RDB is never modified once produced, and while it gets produced it uses a temporary name and is renamed into its final destination atomically using `rename` only when the new snapshot is complete. You can also copy the AOF file in order to create backups. + +Please read the [official documentation](https://redis.io/topics/persistence) for more information. + +## Backing up Redis Data + +**Make Sure to Backup Your Database.** Disks break, instances in the cloud disappear, and so forth. + +By default Redis saves snapshots of the dataset on disk, in a binary file called `dump.rdb`. You can configure Redis to have it save the dataset every N seconds if there are at least M changes in the dataset, or you can manually call the [`SAVE`](https://redis.io/commands/save) or [`BGSAVE`](https://redis.io/commands/bgsave) commands. + +Redis is very data backup friendly since you can copy RDB files while the database is running: the RDB is never modified once produced, and while it gets produced it uses a temporary name and is renamed into its final destination atomically using `rename(2)` only when the new snapshot is complete. + +This means that copying the RDB file is completely safe while the server is running. This is what we suggest: + +- Create a cron job in your server creating hourly snapshots of the RDB file in one directory, and daily snapshots in a different directory. +- Every time the cron script runs, make sure to call the `find` command to make sure too old snapshots are deleted: for instance you can take hourly snapshots for the latest 48 hours, and daily snapshots for one or two months. Make sure to name the snapshots with data and time information. +- At least one time every day make sure to transfer an RDB snapshot _outside your data center_ or at least _outside the physical machine_ running your Redis instance. + +Please read the [official documentation](https://redis.io/topics/persistence) for more information. + +--- + +## Recommended Managed Redis Service + +### Amazon ElastiCache for Redis + +[Amazon ElastiCache for Redis](https://aws.amazon.com/elasticache/redis) is a fully managed, Redis-compatible in-memory data store built for the cloud. It provides [automatic failover](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/AutoFailover.html), [automatic backup](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/backups-automatic.html) features to ensure availability and durability. + +> **Note**: Amazon ElastiCache for Redis has two type: cluster mode disabled and cluster mode enabled. Because JuiceFS uses [transaction](https://redis.io/topics/transactions) to guarantee the atomicity of metadata operations, so you couldn't use "cluster mode enabled" type. + +### Google Cloud Memorystore for Redis + +[Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for the Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments. + +### Azure Cache for Redis + +[Azure Cache for Redis](https://azure.microsoft.com/en-us/services/cache) is a fully managed, in-memory cache that enables high-performance and scalable architectures. Use it to create cloud or hybrid deployments that handle millions of requests per second at sub-millisecond latency—all with the configuration, security, and availability benefits of a managed service. + +### Alibaba Cloud ApsaraDB for Redis + +[Alibaba Cloud ApsaraDB for Redis](https://www.alibabacloud.com/product/apsaradb-for-redis) is a database service that is compatible with native Redis protocols. It supports a hybrid of memory and hard disks for data persistence. ApsaraDB for Redis provides a highly available hot standby architecture and can scale to meet requirements for high-performance and low-latency read/write operations. + +> **Note**: ApsaraDB for Redis supports 3 type [architectures](https://www.alibabacloud.com/help/doc-detail/86132.htm): standard, cluster and read/write splitting. Because JuiceFS uses [transaction](https://redis.io/topics/transactions) to guarantee the atomicity of metadata operations, so you couldn't use cluster type architecture. + +### Tencent Cloud TencentDB for Redis + +[Tencent Cloud TencentDB for Redis](https://intl.cloud.tencent.com/product/crs) is a caching and storage service compatible with the Redis protocol. It features a rich variety of data structure options to help you develop different types of business scenarios, and offers a complete set of database services such as primary-secondary hot backup, automatic switchover for disaster recovery, data backup, failover, instance monitoring, online scaling and data rollback. + +> **Note**: TencentDB for Redis supports 2 type [architectures](https://intl.cloud.tencent.com/document/product/239/3205): standard and cluster. Because JuiceFS uses [transaction](https://redis.io/topics/transactions) to guarantee the atomicity of metadata operations, so you couldn't use cluster type architecture. diff --git a/docs/en/administration/metadata_dump_load.md b/docs/en/administration/metadata_dump_load.md new file mode 100644 index 0000000..4c4af70 --- /dev/null +++ b/docs/en/administration/metadata_dump_load.md @@ -0,0 +1,123 @@ +--- +sidebar_label: Metadata Backup & Recovery +sidebar_position: 4 +slug: /metadata_dump_load +--- +# JuiceFS Metadata Backup & Recovery + +:::tip +- JuiceFS v0.15.2 started to support manual backup, recovery and inter-engine migration of metadata. +- JuiceFS v1.0.0 starts to support automatic metadata backup +::: + +## Manual Backup + +JuiceFS supports [multiple metadata storage engines](../reference/how_to_setup_metadata_engine.md), and each engine has a different data management format internally. To facilitate management, JuiceFS provides `dump` command to allow writing all metadata in a uniform format to [JSON](https://www.json.org/json-en.html) file for backup. Also, JuiceFS provides `load` command to allow restoring or migrating backups to any metadata storage engine. For more information on the command, please refer to [here](../reference/command_reference.md#juicefs-dump). + +### Metadata Backup + +Metadata can be exported to a file using the `dump` command provided by the JuiceFS client, e.g. + +```bash +juicefs dump redis://192.168.1.6:6379 meta.dump +``` + +By default, this command starts from the root directory `/` and iterates deeply through all the files in the directory tree, writing the metadata information of each file to the file in JSON format. + +:::note +`juicefs dump` only guarantees the integrity of individual files themselves and does not provide a global point-in-time snapshot. If the business is still writing during the dump process, the final result will contain information from different points in time. +::: + +Redis, MySQL and other databases have their own backup tools, such as [Redis RDB](https://redis.io/topics/persistence#backing-up-redis-data) and [mysqldump](https://dev.mysql.com/doc/mysql-backup-excerpt/5.7/en/mysqldump-sql-format.html), etc. Use them as JuiceFS metadata storage, you still need to backup metadata regularly with each database's own backup tool. + +The value of `juicefs dump` is that it can export complete metadata information in a uniform JSON format for easy management and preservation, and it can be recognized and imported by different metadata storage engines. In practice, the `dump` command should be used in conjunction with the backup tool that comes with the database to complement each other. + +:::note +The above discussion is for metadata backup only. A complete file system backup solution should also include at least object storage data backup, such as offsite disaster recovery, recycle bin, multiple versions, etc. +::: + +### Metadata Recovery + +:::tip +JSON backups can only be restored to a `newly created database` or an `empty database`. +::: + +Metadata from a backed up JSON file can be imported into a new **empty database** using the `load` command provided by the JuiceFS client, e.g. + +```bash +juicefs load redis://192.168.1.6:6379 meta.dump +``` + +This command automatically handles conflicts due to the inclusion of files from different points in time, recalculates the file system statistics (space usage, inode counters, etc.), and finally generates a globally consistent metadata in the database. Alternatively, if you want to customize some of the metadata (be careful), you can try to manually modify the JSON file before loading. + +### Metadata Migration Between Engines + +:::tip +The metadata migration operation requires the target database to be `newly created database` or `empty database`. +::: + +Thanks to the commonality of the JSON format, which is recognized by all metadata storage engines supported by JuiceFS, it is possible to export metadata information from one engine as a JSON backup and then import it to another engine, thus enabling the migration of metadata between different types of engines. Example. + +```bash +$ juicefs dump redis://192.168.1.6:6379 meta.dump +$ juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs meta.dump +``` + +It is also possible to migrate directly through the system's Pipe. + +```bash +$ juicefs dump redis://192.168.1.6:6379 | juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs +``` + +:::caution +To ensure consistent file system content before and after migration, you need to stop business writes during the migration process. Also, since the original object storage is still used after migration, make sure the old engine is offline or has read-only access to the object storage only before the new metadata engine comes online, otherwise it may cause file system corruption. +::: + +### Metadata Inspection + +In addition to exporting complete metadata information, the `dump` command also supports exporting metadata in specific subdirectories. The exported JSON content is often used to help troubleshoot problems because it gives the user a very visual view of the internal information of all the files under a given directory tree. For example. + +```bash +$ juicefs dump redis://192.168.1.6:6379 meta.dump --subdir /path/in/juicefs +``` + +Moreover, you can use tools like `jq` to analyze the exported file. + +:::note +Please don't dump a too big directory in online system as it may slow down the server. +::: + +## Automatic Backup + +Starting with JuiceFS v1.0.0, the client automatically backups metadata and copies it to the object storage every hour, regardless of whether the file system is mounted via the `mount` command or accessed via the JuiceFS S3 gateway and Hadoop Java SDK. + +The backup files are stored in the `meta` directory of the object storage, which is a separate directory from the Data Store and is not visible in the mount point and does not interact with the Data Store, and can be viewed and managed using the File Browser of the object storage. + +![](../images/meta-auto-backup-list.png) + +By default, the JuiceFS client backs up metadata once an hour. The frequency of automatic backups can be adjusted with the `--backup-meta` option when mounting the filesystem, for example, to set the auto-backup to be performed every 8 hours. + +```shell +$ sudo juicefs mount -d --backup-meta 8h redis://127.0.0.1:6379/1 /mnt +``` + +The backup frequency can be accurate to the second and the units supported are as follows. + +- `h`: accurate to the hour, e.g. `1h`. +- `m`: accurate to the minute, e.g. `30m`, `1h30m`. +- `s`: accurate to the second, such as `50s`, `30m50s`, `1h30m50s`; + +### Automatic Backup Policy + +Although automatic metadata backup becomes the default action for clients, backup conflicts do not occur when multiple hosts share the same filesystem mount. + +JuiceFS maintains a global timestamp to ensure that only one client performs the backup operation at the same time. When different backup periods are set between clients, then the backup is performed with the shortest period setting. + +### Backup Cleanup Policy + +JuiceFS periodically cleans up backups according to the following rules. + +- Keep all backups up to 2 days. +- For more than 2 days and less than 2 weeks, keep 1 backup per day. +- For more than 2 weeks and less than 2 months, keep 1 backup per week. +- For more than 2 months, keep 1 backup for each month. diff --git a/docs/en/administration/migration/_from_hdfs.md b/docs/en/administration/migration/_from_hdfs.md new file mode 100644 index 0000000..fc22b31 --- /dev/null +++ b/docs/en/administration/migration/_from_hdfs.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Migrate from HDFS +sidebar_position: 3 +--- +# Migrate from HDFS \ No newline at end of file diff --git a/docs/en/administration/migration/_from_local.md b/docs/en/administration/migration/_from_local.md new file mode 100644 index 0000000..eeb1e5c --- /dev/null +++ b/docs/en/administration/migration/_from_local.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Migrate from Local Disk +sidebar_position: 1 +--- +# Migrate from Local Disk (or NAS) \ No newline at end of file diff --git a/docs/en/administration/migration/_from_s3.md b/docs/en/administration/migration/_from_s3.md new file mode 100644 index 0000000..8e397ab --- /dev/null +++ b/docs/en/administration/migration/_from_s3.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Migrate from Object Storage +sidebar_position: 2 +--- +# Migrate from Object Storage \ No newline at end of file diff --git a/docs/en/administration/monitoring.md b/docs/en/administration/monitoring.md new file mode 100644 index 0000000..cfc0813 --- /dev/null +++ b/docs/en/administration/monitoring.md @@ -0,0 +1,214 @@ +--- +sidebar_label: Monitoring +sidebar_position: 6 +--- + +# Monitoring + +JuiceFS provides a [Prometheus](https://prometheus.io) API for each file system (the default API address is `http://localhost:9567/metrics`), which can be used to collect JuiceFS monitoring metrics. Once the monitoring metrics are collected, they can be quickly displayed via the [Grafana](https://grafana.com) dashboard template provided by JuiceFS. + +## Collecting monitoring metrics + +There are different ways to collect monitoring metrics depending on how JuiceFS is deployed, which are described below. + +### Mount point + +When the JuiceFS file system is mounted via the [`juicefs mount`](../reference/command_reference.md#juicefs-mount) command, you can collect monitoring metrics via the address `http://localhost:9567/metrics`, or you can customize it via the `--metrics` option. For example: + +```shell +$ juicefs mount --metrics localhost:9567 ... +``` + +You can view these monitoring metrics using the command line tool: + +```shell +$ curl http://localhost:9567/metrics +``` + +In addition, the root directory of each JuiceFS file system has a hidden file called `.stats`, through which you can also view monitoring metrics. For example (assuming here that the path to the mount point is `/jfs`): + +```shell +$ cat /jfs/.stats +``` + +### Kubernetes + +The [JuiceFS CSI Driver](../deployment/how_to_use_on_kubernetes.md) will provide monitoring metrics on the `9567` port of the mount pod by default, or you can customize it by adding the `metrics` option to the `mountOptions` (please refer to the [CSI Driver documentation](https://juicefs.com/docs/csi/examples/mount-options) for how to modify `mountOptions`), e.g.: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: juicefs-pv + labels: + juicefs-name: ten-pb-fs +spec: + ... + mountOptions: + - metrics=0.0.0.0:9567 +``` + +Add a crawl job to `prometheus.yml` to collect monitoring metrics: + +```yaml +scrape_configs: + - job_name: 'juicefs' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: juicefs-mount + - source_labels: [__address__] + action: replace + regex: ([^:]+)(:\d+)? + replacement: $1:9567 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace +``` + +Here assume the Prometheus server is running inside Kubernetes cluster, if your Prometheus server is running outside Kubernetes cluster, make sure Kubernetes cluster nodes are reachable from Prometheus server, refer to [this issue](https://github.com/prometheus/prometheus/issues/4633) to add the `api_server` and `tls_config` client auth to the above configuration like this: + +```yaml +scrape_configs: + - job_name: 'juicefs' + kubernetes_sd_configs: + - api_server: + role: pod + tls_config: + ca_file: <...> + cert_file: <...> + key_file: <...> + insecure_skip_verify: false + relabel_configs: + ... +``` + +### S3 Gateway + +:::note +This feature needs to run JuiceFS client version 0.17.1 and above. +::: + +The [JuiceFS S3 Gateway](../deployment/s3_gateway.md) will provide monitoring metrics at the address `http://localhost:9567/metrics` by default, or you can customize it with the `-metrics` option. For example: + +```shell +$ juicefs gateway --metrics localhost:9567 ... +``` + +If you are deploying JuiceFS S3 Gateway in Kubernetes, you can refer to the Prometheus configuration in the [Kubernetes](#kubernetes) section to collect monitoring metrics (the difference is mainly in the regular expression for the label `__meta_kubernetes_pod_label_app_kubernetes_io_name`), e.g.: + +```yaml +scrape_configs: + - job_name: 'juicefs-s3-gateway' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: juicefs-s3-gateway + - source_labels: [__address__] + action: replace + regex: ([^:]+)(:\d+)? + replacement: $1:9567 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace +``` + +#### Collected via Prometheus Operator + +[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) enables users to quickly deploy and manage Prometheus in Kubernetes, with the help of the `ServiceMonitor` CRD provided by Prometheus Operator can automatically generate scrape configuration. For example (assuming that the `Service` of the JuiceFS S3 Gateway is deployed in the `kube-system` namespace): + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: juicefs-s3-gateway +spec: + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + app.kubernetes.io/name: juicefs-s3-gateway + endpoints: + - port: metrics +``` + +For more information about Prometheus Operator, please check [official document](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md). + +### Hadoop + +The [JuiceFS Hadoop Java SDK](../deployment/hadoop_java_sdk.md) supports reporting monitoring metrics to [Pushgateway](https://github.com/prometheus/pushgateway) and then letting Prometheus scrape the metrics from Pushgateway. + +Please enable metrics reporting with the following configuration: + +```xml + + juicefs.push-gateway + host:port + +``` + +At the same time, the frequency of reporting metrics can be modified through the `juicefs.push-interval` configuration. The default is to report once every 10 seconds. For all configurations supported by JuiceFS Hadoop Java SDK, please refer to [documentation](../deployment/hadoop_java_sdk.md#client-configurations). + +:::info +According to the suggestion of [Pushgateway official document](https://github.com/prometheus/pushgateway/blob/master/README.md#configure-the-pushgateway-as-a-target-to-scrape), Prometheus's [scrape configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) needs to set `honor_labels: true`. + +It is important to note that the timestamp of the metrics scraped by Prometheus from Pushgateway is not the time when the JuiceFS Hadoop Java SDK reported it, but the time when it was scraped. For details, please refer to [Pushgateway official document](https://github.com/prometheus/pushgateway/blob/master/README.md#about-timestamps). + +By default, Pushgateway will only save metrics in memory. If you need to persist to disk, you can specify the file path for saving with the `--persistence.file` option and the frequency of saving to the file with the `--persistence.interval` option (the default save time is 5 minutes). +::: + +:::note +Each process using JuiceFS Hadoop Java SDK will have a unique metric, and Pushgateway will always remember all the collected metrics, resulting in the continuous accumulation of metrics and taking up too much memory, which will also slow down Prometheus scrapes metrics. It is recommended to clean up metrics on Pushgateway regularly. + +Regularly use the following command to clean up the metrics of Pushgateway. Clearing the metrics will not affect the running JuiceFS Hadoop Java SDK to continuously report data. **Note that the `--web.enable-admin-api` option must be specified when Pushgateway is started, and the following command will clear all monitoring metrics in Pushgateway.** + +```bash +$ curl -X PUT http://host:9091/api/v1/admin/wipe +``` +::: + +For more information about Pushgateway, please check [official document](https://github.com/prometheus/pushgateway/blob/master/README.md). + +### Use Consul as registration center + +:::note +This feature needs to run JuiceFS client version 1.0.0 and above. +::: + +JuiceFS support use Consul as registration center for metrics API. The default Consul address is `127.0.0.1:8500`. You could custom the address through `--consul` option, e.g.: + +```shell +$ juicefs mount --consul 1.2.3.4:8500 ... +``` + +When the Consul address is configured, the `--metrics` option does not need to be configured. JuiceFS will automatically configure metrics URL according to its own network and port conditions. If `--metrics` is set at the same time, it will first try to listen on the configured metrics URL. + +For each instance registered to Consul, its `serviceName` is `juicefs`, and the format of `serviceId` is `:`, for example: `127.0.0.1:/tmp/jfs`. + +The meta of each instance contains two aspects: `hostname` and `mountpoint`. When `mountpoint` is `s3gateway`, which means that the instance is an S3 gateway. + +## Display monitoring metrics + +### Grafana dashboard template + +JuiceFS provides some dashboard templates for Grafana, which can be imported to show the collected metrics in Prometheus. The dashboard templates currently available are: + +| Name | Description | +| ---- | ----------- | +| [`grafana_template.json`](https://github.com/juicedata/juicefs/blob/main/docs/en/grafana_template.json) | For show metrics collected from mount point, S3 gateway (non-Kubernetes deployment) and Hadoop Java SDK | +| [`grafana_template_k8s.json`](https://github.com/juicedata/juicefs/blob/main/docs/en/grafana_template_k8s.json) | For show metrics collected from Kubernetes CSI Driver and S3 gateway (Kubernetes deployment) | + +A sample Grafana dashboard looks like this: + +![JuiceFS Grafana dashboard](../images/grafana_dashboard.png) + +## Monitoring metrics reference + +Please refer to the ["JuiceFS Metrics"](../reference/p8s_metrics.md) document. diff --git a/docs/en/administration/quota.md b/docs/en/administration/quota.md new file mode 100644 index 0000000..14c3f64 --- /dev/null +++ b/docs/en/administration/quota.md @@ -0,0 +1,128 @@ +--- +sidebar_label: Storage Quota +sidebar_position: 7 +--- +# JuiceFS Storage Quota + +JuiceFS v0.14.2 begins to support file system level storage quotas, a feature that includes: + +- Limit the total available capacity of the file system +- Limit the total inodes of the file system + +:::tip +The storage quota settings are stored in the metadata engine for all mount points to read, and the client of each mount point will also cache its own used capacity and inodes and synchronize them with the metadata engine once per second, while the client will read the latest usage value from the metadata engine every 10 seconds to synchronize the usage information among each mount point, but this information synchronization mechanism does not guarantee that the usage data will be counted accurately. +::: + +## View file system information + +In a Linux environment, for example, the default capacity of a JuiceFS type file system is identified as `1.0P` using the `df` command that comes with the system. + +```shell +$ df -Th | grep juicefs +JuiceFS:ujfs fuse.juicefs 1.0P 682M 1.0P 1% /mnt +``` + +:::note +JuiceFS implements support for the POSIX interface through FUSE, because the underlying object storage is usually of infinitely scalable capacity, so the marked capacity is only a valuation (which also means unlimited) and not the actual capacity, which changes dynamically with the actual usage. +::: + +The `config` command that comes with the client allows you to view the details of a filesystem. + +```shell +$ juicefs config $METAURL +{ + "Name": "ujfs", + "UUID": "1aa6d290-279b-432f-b9b5-9d7fd597dec2", + "Storage": "minio", + "Bucket": "127.0.0.1:9000/jfs1", + "AccessKey": "herald", + "SecretKey": "removed", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 0 +} +``` + +## Limit total capacity + +The capacity limit in GiB can be set with `--capacity` when creating a file system, e.g. to create a file system with an available capacity of 100 GiB: + +```shell +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--capacity 100 \ +$METAURL myjfs +``` + +You can also set a capacity limit for a created filesystem with the `config` command: + +```shell +$ juicefs config $METAURL --capacity 100 +2022/01/27 12:31:39.506322 juicefs[16259] : Meta address: postgres://herald@127.0.0.1:5432/jfs1 +2022/01/27 12:31:39.521232 juicefs[16259] : The latency to database is too high: 14.771783ms + capacity: 0 GiB -> 100 GiB +``` + +For file systems with storage quota set, the identification capacity becomes the quota capacity: + +```shell +$ df -Th | grep juicefs +JuiceFS:ujfs fuse.juicefs 100G 682M 100G 1% /mnt +``` + +## Limit the total number of inodes + +On Linux systems, each file (a folder is also a type of file) has an inode regardless of size, so limiting the number of inodes is equivalent to limiting the number of files. + +The quota can be set with `--inodes` when creating the filesystem, e.g. + +```shell +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--inodes 100 \ +$METAURL myjfs +``` + +The file system created by the above command allows only 100 files to be stored, but there is no limit to the size of individual files, for example, a single file of 1TB or even larger is fine, as long as the total number of files does not exceed 100. + +You can also set a capacity quota for a created filesystem by using the `config` command: + +```shell +$ juicefs config $METAURL --inodes 100 +2022/01/27 12:35:37.311465 juicefs[16407] : Meta address: postgres://herald@127.0.0.1:5432/jfs1 +2022/01/27 12:35:37.322991 juicefs[16407] : The latency to database is too high: 11.413961ms + inodes: 0 -> 100 +``` + +## Put together + +You can combine `--capacity` and `--inodes` to set the capacity quota of a filesystem more flexibly, for example, to create a filesystem that limits the total capacity to 100TiB and allows only 100000 files to be stored: + +```shell +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--capacity 102400 \ +--inodes 100000 \ +$METAURL myjfs +``` + +Similarly, for the created file systems, the following settings can be made separately. + +```shell +juicefs config $METAURL --capacity 102400 +``` + +```shell +juicefs config $METAURL --inodes 100000 +``` + +:::tip +The client reads the latest storage quota settings from the metadata engine every 60 seconds to update the local settings, and this time frequency may cause other mount points to take up to 60 seconds to complete the quota setting update. +::: diff --git a/docs/en/administration/status_check_and_maintenance.md b/docs/en/administration/status_check_and_maintenance.md new file mode 100644 index 0000000..ca4c4f3 --- /dev/null +++ b/docs/en/administration/status_check_and_maintenance.md @@ -0,0 +1,8 @@ +--- +sidebar_label: Status Check & Maintenance +sidebar_position: 8 +--- +# Status Check & Maintenance + +:::note +Working in progress. \ No newline at end of file diff --git a/docs/en/administration/sync_accounts_between_multiple_hosts.md b/docs/en/administration/sync_accounts_between_multiple_hosts.md new file mode 100644 index 0000000..964cda2 --- /dev/null +++ b/docs/en/administration/sync_accounts_between_multiple_hosts.md @@ -0,0 +1,134 @@ +--- +sidebar_label: Sync Accounts between Multiple Hosts +sidebar_position: 10 +slug: /sync_accounts_between_multiple_hosts +--- + +# Sync Accounts between Multiple Hosts + +JuiceFS supports POSIX compatible ACL to manage permissions in the granularity of directory or file. The behavior is the same as a local file system. + +To provide users with an intuitive and consistent permission management experience (e.g. the files accessible by user A in host X should be accessible in host Y with the same user), the same user who want to access JuiceFS should have the same UID and GID on all hosts. + +Here we provide a simple [Ansible](https://www.ansible.com/community) playbook to demonstrate how to ensure an account with same UID and GID on multiple hosts. + +:::note +If you are using JuiceFS in Hadoop environment, besides sync accounts between multiple hosts, you can also specify a global user list and user group file, please refer to [here](../deployment/hadoop_java_sdk.md#other-configurations) for more information. +::: + +## Install Ansible + +Select a host as a [control node](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#managed-node-requirements) which can access all hosts using `ssh` with the same privileged account like `root` or other sudo account. Install Ansible on this host. Read [Installing Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installing-ansible) for more installation details. + + + +## Ensure the same account on all hosts + +Create an empty directory `account-sync` , save below content in `play.yaml` under this directory. + +```yaml +--- +- hosts: all + tasks: + - name: "Ensure group {{ group }} with gid {{ gid }} exists" + group: + name: "{{ group }}" + gid: "{{ gid }}" + state: present + + - name: "Ensure user {{ user }} with uid {{ uid }} exists" + user: + name: "{{ user }}" + uid: "{{ uid }}" + group: "{{ gid }}" + state: present +``` + + + +Create a file named `hosts` in this directory, place IP addresses of all hosts need to create account in this file, each line with a host's IP. + +Here we ensure an account `alice` with UID 1200 and group `staff` with GID 500 on 2 hosts: + +```shell +~/account-sync$ cat hosts +172.16.255.163 +172.16.255.180 +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=staff gid=500 user=alice uid=1200" play.yaml + +PLAY [all] ************************************************************************************************ + +TASK [Gathering Facts] ************************************************************************************ +ok: [172.16.255.180] +ok: [172.16.255.163] + +TASK [Ensure group staff with gid 500 exists] ************************************************************* +ok: [172.16.255.163] +ok: [172.16.255.180] + +TASK [Ensure user alice with uid 1200 exists] ************************************************************* +changed: [172.16.255.180] +changed: [172.16.255.163] + +PLAY RECAP ************************************************************************************************ +172.16.255.163 : ok=3 changed=1 unreachable=0 failed=0 +172.16.255.180 : ok=3 changed=1 unreachable=0 failed=0 +``` + +Now the new account `alice:staff` has been created on these 2 hosts. + +If the UID or GID specified has been allocated to another user or group on some hosts, the creation would failed. + +```shell +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=ubuntu gid=1000 user=ubuntu uid=1000" play.yaml + +PLAY [all] ************************************************************************************************ + +TASK [Gathering Facts] ************************************************************************************ +ok: [172.16.255.180] +ok: [172.16.255.163] + +TASK [Ensure group ubuntu with gid 1000 exists] *********************************************************** +ok: [172.16.255.163] +fatal: [172.16.255.180]: FAILED! => {"changed": false, "msg": "groupmod: GID '1000' already exists\n", "name": "ubuntu"} + +TASK [Ensure user ubuntu with uid 1000 exists] ************************************************************ +ok: [172.16.255.163] + to retry, use: --limit @/home/ubuntu/account-sync/play.retry + +PLAY RECAP ************************************************************************************************ +172.16.255.163 : ok=3 changed=0 unreachable=0 failed=0 +172.16.255.180 : ok=1 changed=0 unreachable=0 failed=1 +``` + +In above example, the group ID 1000 has been allocated to another group on host `172.16.255.180` , we should **change the GID** or **delete the group with GID 1000** on host `172.16.255.180` , then run the playbook again. + +:::caution +If the user account has already existed on the host and we change it to another UID or GID value, the user may loss permissions to the files and directories which they previously have. For example: + +```shell +$ ls -l /tmp/hello.txt +-rw-r--r-- 1 alice staff 6 Apr 26 21:43 /tmp/hello.txt +$ id alice +uid=1200(alice) gid=500(staff) groups=500(staff) +``` + +We change the UID of alice from 1200 to 1201 + +```shell +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=staff gid=500 user=alice uid=1201" play.yaml +``` + +Now we have no permission to remove this file as its owner is not alice: + +```shell +$ ls -l /tmp/hello.txt +-rw-r--r-- 1 1200 staff 6 Apr 26 21:43 /tmp/hello.txt +$ rm /tmp/hello.txt +rm: remove write-protected regular file '/tmp/hello.txt'? y +rm: cannot remove '/tmp/hello.txt': Operation not permitted +``` +::: diff --git a/docs/en/benchmark/_performance_tuning.md b/docs/en/benchmark/_performance_tuning.md new file mode 100644 index 0000000..1411c6c --- /dev/null +++ b/docs/en/benchmark/_performance_tuning.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Performance Tuning +sidebar_position: 5 +--- +# Performance Tuning \ No newline at end of file diff --git a/docs/en/benchmark/benchmark.md b/docs/en/benchmark/benchmark.md new file mode 100644 index 0000000..b41f05c --- /dev/null +++ b/docs/en/benchmark/benchmark.md @@ -0,0 +1,41 @@ +--- +sidebar_label: Performance Benchmark +sidebar_position: 1 +slug: . +--- +# Performance Benchmark + +## Basic benchmark + +JuiceFS provides a subcommand to run a few basic benchmarks to understand how it works in your environment: + +![JuiceFS Bench](../images/juicefs-bench.png) + +## Throughput + +Performed a sequential read/write benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [fio](https://github.com/axboe/fio), here is the result: + +[![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)](../images/sequential-read-write-benchmark.svg) + +It shows JuiceFS can provide 10X more throughput than the other two, read [more details](fio.md). + +## Metadata IOPS + +Performed a simple mdtest benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [mdtest](https://github.com/hpc/ior), here is the result: + +[![Metadata Benchmark](../images/metadata-benchmark.svg)](../images/metadata-benchmark.svg) + +It shows JuiceFS can provide significantly more metadata IOPS than the other two, read [more details](mdtest.md). + +## Analyze performance + +There is a virtual file called `.accesslog` in the root of JuiceFS to show all the operations and the time they takes, for example: + +``` +$ cat /jfs/.accesslog +2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010> +2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014> +2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006> +``` + +The last number on each line is the time (in seconds) current operation takes. You can use this directly to debug and analyze performance issues, or try `./juicefs profile /jfs` to monitor real time statistics. Please run `./juicefs profile -h` or refer to [here](../benchmark/operations_profiling.md) to learn more about this subcommand. diff --git a/docs/en/benchmark/fio.md b/docs/en/benchmark/fio.md new file mode 100644 index 0000000..456030c --- /dev/null +++ b/docs/en/benchmark/fio.md @@ -0,0 +1,73 @@ +--- +sidebar_label: Benchmark with fio +sidebar_position: 7 +slug: /fio +--- +# Benchmark with fio + +## Testing Approach + +Performed a sequential read/write benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [fio](https://github.com/axboe/fio). + +## Testing Tool + +The following tests were performed by fio 3.1. + +Sequential read test (numjobs: 1): + +``` +fio --name=sequential-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G +fio --name=sequential-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G +fio --name=sequential-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G +``` + +Sequential write test (numjobs: 1): + +``` +fio --name=sequential-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +fio --name=sequential-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +fio --name=sequential-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +``` + +Sequential read test (numjobs: 16): + +``` +fio --name=big-file-multi-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +fio --name=big-file-multi-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +fio --name=big-file-multi-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +``` + +Sequential write test (numjobs: 16): + +``` +fio --name=big-file-multi-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +fio --name=big-file-multi-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +fio --name=big-file-multi-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +``` + +## Testing Environment + +In the following test results, all fio tests based on the c5d.18xlarge EC2 instance (72 CPU, 144G RAM), Ubuntu 18.04 LTS (Kernel 5.4.0) system, JuiceFS use the local Redis instance (version 4.0.9) to store metadata. + +JuiceFS mount command: + +``` +./juicefs format --storage=s3 --bucket=https://.s3..amazonaws.com localhost benchmark +./juicefs mount --max-uploads=150 --io-retries=20 localhost /jfs +``` + +EFS mount command (the same as the configuration page): + +``` +mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, .efs..amazonaws.com:/ /efs +``` + +S3FS (version 1.82) mount command: + +``` +s3fs :/s3fs /s3fs -o host=https://s3..amazonaws.com,endpoint=,passwd_file=${HOME}/.passwd-s3fs +``` + +## Testing Result + +![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg) diff --git a/docs/en/benchmark/mdtest.md b/docs/en/benchmark/mdtest.md new file mode 100644 index 0000000..abcd435 --- /dev/null +++ b/docs/en/benchmark/mdtest.md @@ -0,0 +1,122 @@ +--- +sidebar_label: Benchmark with mdtest +sidebar_position: 8 +slug: /mdtest +--- +# Benchmark with mdtest + +## Testing Approach + +Performed a metadata test on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [mdtest](https://github.com/hpc/ior). + +## Testing Tool + +The following tests were performed by mdtest 3.4. +Arguments of mdtest are adjusted to ensure the command can be finished in 5 minutes. + +``` +./mdtest -d /s3fs/mdtest -b 6 -I 8 -z 2 +./mdtest -d /efs/mdtest -b 6 -I 8 -z 4 +./mdtest -d /jfs/mdtest -b 6 -I 8 -z 4 +``` + +## Testing Environment + +In the following test results, all mdtest tests based on the c5.large EC2 instance (2 CPU, 4G RAM), Ubuntu 18.04 LTS (Kernel 5.4.0) system, JuiceFS use Redis (version 4.0.9) running on a c5.large EC2 instance in the same available zone to store metadata. + +JuiceFS mount command: + +``` +./juicefs format --storage=s3 --bucket=https://.s3..amazonaws.com localhost benchmark +nohup ./juicefs mount localhost /jfs & +``` + +EFS mount command (the same as the configuration page): + +``` +mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, .efs..amazonaws.com:/ /efs +``` + +S3FS (version 1.82) mount command: + +``` +s3fs :/s3fs /s3fs -o host=https://s3..amazonaws.com,endpoint=,passwd_file=${HOME}/.passwd-s3fs +``` + +## Testing Result + +![Metadata Benchmark](../images/metadata-benchmark.svg) + +### S3FS +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/s3fs/mdtest' '-b' '6' '-I' '8' '-z' '2' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /s3fs/mdtest +FS : 256.0 TiB Used FS: 0.0% Inodes: 0.0 Mi Used Inodes: -nan% +Nodemap: 1 +1 tasks, 344 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 5.977 5.977 5.977 0.000 + Directory stat : 435.898 435.898 435.898 0.000 + Directory removal : 8.969 8.969 8.969 0.000 + File creation : 5.696 5.696 5.696 0.000 + File stat : 68.692 68.692 68.692 0.000 + File read : 33.931 33.931 33.931 0.000 + File removal : 23.658 23.658 23.658 0.000 + Tree creation : 5.951 5.951 5.951 0.000 + Tree removal : 9.889 9.889 9.889 0.000 +``` + +### EFS + +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/efs/mdtest' '-b' '6' '-I' '8' '-z' '4' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /efs/mdtest +FS : 8388608.0 TiB Used FS: 0.0% Inodes: 0.0 Mi Used Inodes: -nan% +Nodemap: 1 +1 tasks, 12440 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 192.301 192.301 192.301 0.000 + Directory stat : 1311.166 1311.166 1311.166 0.000 + Directory removal : 213.132 213.132 213.132 0.000 + File creation : 179.293 179.293 179.293 0.000 + File stat : 915.230 915.230 915.230 0.000 + File read : 371.012 371.012 371.012 0.000 + File removal : 217.498 217.498 217.498 0.000 + Tree creation : 187.906 187.906 187.906 0.000 + Tree removal : 218.357 218.357 218.357 0.000 +``` + +### JuiceFS + +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/jfs/mdtest' '-b' '6' '-I' '8' '-z' '4' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /jfs/mdtest +FS : 1024.0 TiB Used FS: 0.0% Inodes: 10.0 Mi Used Inodes: 0.0% +Nodemap: 1 +1 tasks, 12440 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 1416.582 1416.582 1416.582 0.000 + Directory stat : 3810.083 3810.083 3810.083 0.000 + Directory removal : 1115.108 1115.108 1115.108 0.000 + File creation : 1410.288 1410.288 1410.288 0.000 + File stat : 5023.227 5023.227 5023.227 0.000 + File read : 3487.947 3487.947 3487.947 0.000 + File removal : 1163.371 1163.371 1163.371 0.000 + Tree creation : 1503.004 1503.004 1503.004 0.000 + Tree removal : 1119.806 1119.806 1119.806 0.000 +``` diff --git a/docs/en/benchmark/metadata_engines_benchmark.md b/docs/en/benchmark/metadata_engines_benchmark.md new file mode 100644 index 0000000..2253816 --- /dev/null +++ b/docs/en/benchmark/metadata_engines_benchmark.md @@ -0,0 +1,188 @@ +--- +sidebar_label: Metadata Engines Benchmark +sidebar_position: 6 +slug: /metadata_engines_benchmark +--- +# Metadata Engines Benchmark + +Conclusion first: + +- For pure metadata operations, MySQL costs about 2 ~ 4x times of Redis; TiKV has similar performance to MySQL, and in most cases it costs a bit less +- For small I/O (~100 KiB) workloads, total time costs with MySQL are about 1 ~ 3x of those with Redis; TiKV performs similarly to MySQL +- For large I/O (~4 MiB) workloads, total time costs with different metadata engines show no obvious difference (object storage becomes the bottleneck) + +>**Note**: +> +>1. By changing `appendfsync` from `always` to `everysec`, Redis gains performance boost but loses a bit of data reliability; more information can be found [here](https://redis.io/topics/persistence) +>2. Both Redis and MySQL store only one replica locally, while TiKV stores three replicas in three different hosts using Raft protocol + + +Details are provided below. Please note all the tests are run with the same object storage (to save data), client and metadata hosts; only metadata engines differ. + +## Environment + +### JuiceFS Version + +juicefs version 0.16-dev (2021-07-20 9efa870) + +### Object Storage + +Amazon S3 + +### Client Hosts + +- Amazon c5.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network +- Ubuntu 18.04.4 LTS + +### Meta Hosts + +- Amazon c5d.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network, 100 GB SSD (local storage for metadata engines) +- Ubuntu 18.04.4 LTS +- SSD is formated as ext4 and mounted on `/data` + +### Meta Engines + +#### Redis + +- Version: [6.2.3](https://download.redis.io/releases/redis-6.2.3.tar.gz) +- Configuration: + - appendonly: yes + - appendfsync: always or everysec + - dir: `/data/redis` + +#### MySQL + +- Version: 8.0.25 +- `/var/lib/mysql` is bind mounted on `/data/mysql` + +### TiKV + +- Version: 5.1.0 +- Configuration: + - deploy_dir: `/data/tikv-deploy` + - data_dir: `/data/tikv-data` + +## Tools + +All the following tests are run for each metadata engine. + +### Golang Benchmark + +Simple benchmarks within the source code: `pkg/meta/benchmarks_test.go`. + +### JuiceFS Bench + +JuiceFS provides a basic benchmark command: + +```bash +$ ./juicefs bench /mnt/jfs +``` + +### mdtest + +- Version: mdtest-3.4.0+dev + +Run parallel tests on 3 client nodes: + +```bash +$ cat myhost +client1 slots=4 +client2 slots=4 +client3 slots=4 +``` + +Test commands: + +```bash +# metadata only +$ mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -b 3 -z 1 -I 100 -u -d /mnt/jfs + +# 12000 * 100KiB files +$ mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -F -w 102400 -I 1000 -z 0 -u -d /mnt/jfs +``` + +### fio + +- Version: fio-3.1 + +```bash +fio --name=big-write --directory=/mnt/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=4 --end_fsync=1 --group_reporting +``` + +## Results + +### Golang Benchmark + +- Shows time cost (us/op), smaller is better +- Number in parentheses is the multiple of Redis-Always cost (`always` and `everysec` are candidates for Redis configuration `appendfsync`) +- Because of metadata cache, the results of `Read` are all less than 1us, which are not comparable for now + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| ------------ | ------------ | -------------- | ----- | ---- | +| mkdir | 986 | 700 (0.7) | 2274 (2.3) | 1961 (2.0) | +| mvdir | 1116 | 940 (0.8) | 3690 (3.3) | 2145 (1.9) | +| rmdir | 981 | 817 (0.8) | 2980 (3.0) | 2300 (2.3) | +| readdir_10 | 376 | 378 (1.0) | 1365 (3.6) | 965 (2.6) | +| readdir_1k | 1804 | 1819 (1.0) | 15449 (8.6) | 6776 (3.8) | +| mknod | 968 | 665 (0.7) | 2325 (2.4) | 1997 (2.1) | +| create | 957 | 703 (0.7) | 2291 (2.4) | 1971 (2.1) | +| rename | 1082 | 1040 (1.0) | 3701 (3.4) | 2162 (2.0) | +| unlink | 842 | 710 (0.8) | 3293 (3.9) | 2217 (2.6) | +| lookup | 118 | 127 (1.1) | 409 (3.5) | 571 (4.8) | +| getattr | 108 | 120 (1.1) | 358 (3.3) | 285 (2.6) | +| setattr | 568 | 490 (0.9) | 1239 (2.2) | 1720 (3.0) | +| access | 109 | 116 (1.1) | 354 (3.2) | 283 (2.6) | +| setxattr | 237 | 113 (0.5) | 1197 (5.1) | 1508 (6.4) | +| getxattr | 110 | 108 (1.0) | 326 (3.0) | 279 (2.5) | +| removexattr | 244 | 116 (0.5) | 847 (3.5) | 1856 (7.6) | +| listxattr_1 | 111 | 106 (1.0) | 336 (3.0) | 286 (2.6) | +| listxattr_10 | 112 | 111 (1.0) | 376 (3.4) | 303 (2.7) | +| link | 715 | 574 (0.8) | 2610 (3.7) | 1949 (2.7) | +| symlink | 952 | 702 (0.7) | 2583 (2.7) | 1960 (2.1) | +| newchunk | 235 | 113 (0.5) | 1 (0.0) | 1 (0.0) | +| write | 816 | 564 (0.7) | 2788 (3.4) | 2138 (2.6) | +| read_1 | 0 | 0 (0.0) | 0 (0.0) | 0 (0.0) | +| read_10 | 0 | 0 (0.0) | 0 (0.0) | 0 (0.0) | + +### JuiceFS Bench + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| -------------- | -------------- | -------------- | -------------- | -------------- | +| Write big | 312.81 MiB/s | 303.45 MiB/s | 310.26 MiB/s | 310.90 MiB/s | +| Read big | 348.06 MiB/s | 525.78 MiB/s | 493.45 MiB/s | 477.78 MiB/s | +| Write small | 26.0 files/s | 27.5 files/s | 22.7 files/s | 24.2 files/s | +| Read small | 1431.6 files/s | 1113.4 files/s | 608.0 files/s | 415.7 files/s | +| Stat file | 6713.7 files/s | 6885.8 files/s | 2144.9 files/s | 1164.5 files/s | +| FUSE operation | 0.45 ms | 0.32 ms | 0.41 ms | 0.40 ms | +| Update meta | 1.04 ms | 0.79 ms | 3.36 ms | 1.74 ms | + +### mdtest + +- Shows rate (ops/sec), bigger is better + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| ------------------ | ------------ | -------------- | --------- | --------- | +| **EMPTY FILES** | | | | | +| Directory creation | 4149.645 | 9261.190 | 1603.298 | 2023.177 | +| Directory stat | 172665.701 | 243307.527 | 15678.643 | 15029.717 | +| Directory removal | 4687.027 | 9575.706 | 1420.124 | 1772.861 | +| File creation | 4257.367 | 8994.232 | 1632.225 | 2119.616 | +| File stat | 158793.214 | 287425.368 | 15598.031 | 14466.477 | +| File read | 38872.116 | 47938.792 | 14004.083 | 17149.941 | +| File removal | 3831.421 | 10538.675 | 983.338 | 1497.474 | +| Tree creation | 100.403 | 108.657 | 44.154 | 15.615 | +| Tree removal | 127.257 | 143.625 | 51.804 | 21.005 | +| **SMALL FILES** | | | | | +| File creation | 317.999 | 317.925 | 272.272 | 280.493 | +| File stat | 54063.617 | 57798.963 | 13882.940 | 10984.141 | +| File read | 56891.010 | 57548.889 | 16038.716 | 7155.426 | +| File removal | 3638.809 | 8490.490 | 837.510 | 1184.253 | +| Tree creation | 54.523 | 119.317 | 23.336 | 5.233 | +| Tree removal | 73.366 | 82.195 | 22.783 | 4.918 | + +### fio + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| --------------- | ------------ | -------------- | --------- | --------- | +| Write bandwidth | 350 MiB/s | 360 MiB/s | 360 MiB/s | 358 MiB/s | + diff --git a/docs/en/benchmark/operations_profiling.md b/docs/en/benchmark/operations_profiling.md new file mode 100644 index 0000000..964cddb --- /dev/null +++ b/docs/en/benchmark/operations_profiling.md @@ -0,0 +1,56 @@ +--- +sidebar_label: Operations Profiling +sidebar_position: 3 +slug: /operations_profiling +--- +# Operations Profiling + +## Introduction + +JuiceFS has a special virtual file named [`.accesslog`](../administration/fault_diagnosis_and_analysis.md#access-log) to track every operation occurred within its client. This file may generate thousands of log entries per second when under pressure, making it hard to find out what is actually going on at a certain time. Thus, we made a simple tool called [`juicefs profile`](../reference/command_reference.md#juicefs-profile) to show an overview of recently completed operations. The basic idea is to aggregate all logs in the past interval and display statistics periodically, like: + +![juicefs-profiling](../images/juicefs-profiling.gif) + +## Profiling Modes + +For now there are 2 modes of profiling: real time and replay. + +### Real Time Mode + +By executing the following command you can watch real time operations under the mount point: + +```bash +$ juicefs profile MOUNTPOINT +``` + +> **Tip**: The result is sorted in a descending order by total time. + +### Replay Mode + +Running the `profile` command on an existing log file enables the **replay mode**: + +```bash +$ juicefs profile LOGFILE +``` + +When debugging or analyzing perfomance issues, it is usually more practical to record access log first and then replay it (multiple times). For example: + +```bash +$ cat /jfs/.accesslog > /tmp/jfs-oplog +# later +$ juicefs profile /tmp/jfs-oplog +``` + +> **Tip 1**: The replay could be paused anytime by Enter/Return, and continues by pressing it again. +> +> **Tip 2**: Setting `--interval 0` will replay the whole log file as fast as possible, and show the result as if it was within one interval. + +## Filter + +Sometimes we are only interested in a certain user or process, then we can filter others out by specifying its IDs, e.g: + +```bash +$ juicefs profile /tmp/jfs-oplog --uid 12345 +``` + +For more information, please run `juicefs profile -h`. diff --git a/docs/en/benchmark/performance_evaluation_guide.md b/docs/en/benchmark/performance_evaluation_guide.md new file mode 100644 index 0000000..7dfeca4 --- /dev/null +++ b/docs/en/benchmark/performance_evaluation_guide.md @@ -0,0 +1,284 @@ +--- +sidebar_label: Performance Evaluation Guide +sidebar_position: 2 +slug: /performance_evaluation_guide +--- +# JuiceFS Performance Evaluation Guide + +Before starting performance testing, it is a good idea to write down a general description of that usage scenario, including: + +1. What is the application for? For example, Apache Spark, PyTorch, or a program you developed yourself. +2. the resource allocation for running the application, including CPU, memory, network, and node size +3. the expected size of the data, including the number and volume of files +4. the file size and access patterns (large or small files, sequential or random reads and writes) +5. performance requirements, such as the amount of data to be written or read per second, the QPS of the access or the latency of the operation, etc. + +The clearer and more detailed these above elements are, the easier it will be to prepare a suitable test plan and the performance indicators that need to be focused on to determine the application requirements for various aspects of the storage system, including JuiceFS metadata configuration, network bandwidth requirements, configuration parameters, etc. Of course, it is not easy to write out all of the above clearly at the beginning, and some of the content can be clarified gradually during the testing process,** but at the end of a complete test, the above usage scenario descriptions and the corresponding test methods, test data, and test results should be complete**. + +Even if the above is not yet clear, it does not matter, JuiceFS built-in test tools can be a one-line command to get the core indicators of single-computer benchmark performance. This article will also introduce two JuiceFS built-in performance analysis tools, which can help you analyze the reasons behind JuiceFS performance in a simple and clear way when doing more complex tests. + +## Performance Testing Quick Start + +The following example describes the basic usage of the bench tool built-in to JuiceFS. + +### Working Environment + +- Host: Amazon EC2 c5.xlarge one +- OS: Ubuntu 20.04.1 LTS (Kernel 5.4.0-1029-aws) +- Metadata Engine: Redis 6.2.3, storage (dir) configured on system disk +- Object Storage: Amazon S3 +- JuiceFS Version: 0.17-dev (2021-09-23 2ec2badf) + +### JuiceFS Bench + +The JuiceFS `bench` command can help you quickly complete a single machine performance test to determine if the environment configuration and performance are normal by the test results. Assuming you have mounted JuiceFS to `/mnt/jfs` on your server (if you need help with JuiceFS initialization and mounting, please refer to the [Quick Start Guide](../getting-started/for_local.md), execute the following command (the `-p` parameter is recommended to set the number of CPU cores on the server). + +```bash +juicefs bench /mnt/jfs -p 4 +``` + +The test results will show each performance indicator as green, yellow or red. If you have red indicators in your results, please check the relevant configuration first, and if you need help, you can describe your problem in detail at [GitHub Discussions](https://github.com/juicedata/juicefs/discussions). + +![bench](../images/bench-guide-bench.png) + +The JuiceFS `bench` benchmark performance test flows as follows (its logic is very simple, and those interested in the details can look directly at the [source code](https://github.com/juicedata/juicefs/blob/main/cmd/bench.go). + +1. N concurrently write 1 large file of 1 GiB each with IO size of 1 MiB +2. N concurrently read 1 large file of 1 GiB each previously written, IO size 1 MiB +3. N concurrently write 100 small files of 128 KiB each, IO size is 128 KiB +4. N concurrently read 100 small files of 128 KiB each written previously, IO size 128 KiB +5. N concurrently stat 100 each of previously written 128 KiB small files +6. clean up the temporary directory for testing + +The value of the concurrency number N is specified by the `-p` parameter in the `bench` command. + +Here's a performance comparison using a few common storage types provided by AWS. + +- EFS 1TiB capacity at 150MiB/s read and 50MiB/s write at $0.08/GB-month +- EBS st1 is a throughput-optimized HDD with a maximum throughput of 500MiB/s, a maximum IOPS (1MiB I/O) of 500, and a maximum capacity of 16TiB, priced at $0.045/GB-month +- EBS gp2 is a general-purpose SSD with a maximum throughput of 250MiB/s, maximum IOPS (16KiB I/O) of 16,000, and maximum capacity of 16TiB, priced at $0.10/GB-month + +It is easy to see that in the above test, JuiceFS has significantly better sequential read and write capabilities than AWS EFS and more throughput than the commonly used EBS, but writing small files is not as fast because every file written needs to be persisted to S3 and there is typically a fixed overhead of 10-30ms for calling the object storage API. + +:::note +The performance of Amazon EFS is linearly related to capacity ([refer to the official documentation](https://docs.aws.amazon.com/efs/latest/ug/performance.html#performancemodes)), which makes it unsuitable for use in high throughput scenarios with small data sizes. +::: + +:::note +Prices refer to [AWS US East, Ohio Region](https://aws.amazon.com/ebs/pricing/?nc1=h_ls), prices vary slightly by Region. +::: + +:::note +The above data is from [AWS official documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html), and the performance metrics are maximum values. The actual performance of EBS is related to volume capacity and mounted EC2 instance type, in general The larger the volume, the better the EBS performance will be with the higher specification EC2, but not exceeding the maximum value mentioned above. +::: + +## Performance Observation and Analysis Tools + +The next two performance observation and analysis tools are essential tools for testing, using, and tuning JuiceFS. + +### JuiceFS Stats + +JuiceFS `stats` is a tool for real-time statistics of JuiceFS performance metrics, similar to the `dstat` command on Linux systems, which displays changes in metrics for JuiceFS clients in real time (see [documentation](stats_watcher.md) for detailed). When the `juicefs bench` is running, create a new session and execute the following command. + +```bash +juicefs stats /mnt/jfs --verbosity 1 +``` + +The results are as follows, which can be more easily understood when viewed against the benchmarking process described above. + +![stats](../images/bench-guide-stats.png) + +The specific meaning of each of these indicators is as follows: + +- usage + - cpu: CPU consumed by the JuiceFS process + - mem: the physical memory consumed by the JuiceFS process + - buf: internal read/write buffer size of JuiceFS process, limited by mount option `--buffer-size` + - cache: internal metric, can be simply ignored +- fuse + - ops/lat: number of requests per second processed by the FUSE interface and their average latency (in milliseconds) + - read/write: bandwidth value of the FUSE interface to handle read and write requests per second +- meta + - ops/lat: number of requests per second processed by the metadata engine and their average latency (in milliseconds). Please note that some requests that can be processed directly in the cache are not included in the statistics to better reflect the time spent by the client interacting with the metadata engine. + - txn/lat: the number of **write transactions** processed by the metadata engine per second and their average latency (in milliseconds). Read-only requests such as `getattr` are only counted as ops and not txn. + - retry: the number of **write transactions** that the metadata engine retries per second +- blockcache + - read/write: read/write traffic per second for the client's local data cache +- object + - get/get_c/lat: bandwidth value of object store per second for processing **read requests**, number of requests and their average latency (in milliseconds) + - put/put_c/lat: bandwidth value of object store for **write requests** per second, number of requests and their average latency (in milliseconds) + - del_c/lat: the number of **delete requests** per second and the average latency (in milliseconds) of the object store + +### JuiceFS Profile + +JuiceFS `profile` is used to output all access logs of the JuiceFS client in real time, including information about each request. It can also be used to play back and count the JuiceFS access logs, allowing users to visualize the operation of JuiceFS (for detailed description and usage see [documentation](operations_profiling.md)). When executing `juicefs bench`, the following command is executed in another session. When the `juicefs bench` is running, create a new session and execute the following command. + +```bash +cat /mnt/jfs/.accesslog > access.log +``` + +where `.accessslog` is a virtual file that normally does not produce any data and only has JuiceFS access log output when it is read (e.g. by executing `cat`). When you are finished use Ctrl-C to end the `cat` command and run. + +```bash +juicefs profile access.log --interval 0 +``` + +The `---interval` parameter sets the sampling interval for accessing the log, and when set to 0 is used to quickly replay a specified log file to generate statistics, as shown in the following figure. + +![profile](../images/bench-guide-profile.png) + +From the description of the previous benchmarking process, a total of (1 + 100) * 4 = 404 files were created during this test, and each file went through the process of "Create → Write → Close → Open → Read → Close → Delete", so there are a total of: + +- 404 create, open and unlink requests +- 808 flush requests: flush is automatically invoked whenever a file is closed +- 33168 write/read requests: each large file writes 1024 1 MiB IOs, while the default maximum value of requests at the FUSE level is 128 KiB, which means that each application IO is split into 8 FUSE requests, so there are (1024 * 8 + 100) * 4 = 33168 requests. The read IO is similar and the count is the same. + +All these values correspond exactly to the results of `profile`. This is because JuiceFS `write` writes to the memory buffer first by default and then calls flush to upload data to the object store when the file is closed, as expected. + +## Other Test Tool Configuration Examples + +### Fio Standalone Performance Test + +Fio is a common performance testing tool that can be used to do more complex performance tests after completing the JuiceFS bench. + +#### Working Environment + +Consistent with the JuiceFS Bench test environment described above. + +#### Testing tasks + +Perform the following 4 Fio tasks for sequential write, sequential read, random write, and random read tests. + +Sequential write + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=write --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +Sequential read + +```bash +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=read --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +Random write + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randwrite --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +Random read + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randread --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +Parameters description: + +- `--name`: user-specified test name, which affects the test file name +- `--directory`: test directory +- `--ioengine`: the way to send IO when testing; usually `libaio` is used +- `--rw`: commonly used are read, write, randread, randwrite, which stand for sequential read/write and random read/write respectively +- `--bs`: the size of each IO +- `--size`: the total size of IO per thread; usually equal to the size of the test file +- `--numjobs`: number of concurrent test threads; by default each thread runs a separate test file +- `--direct`: add the `O_DIRECT` flag bit when opening the file, without using system buffering, which can make the test results more stable and accurate + +The results are as follows: + +```bash +# Sequential +WRITE: bw=703MiB/s (737MB/s), 703MiB/s-703MiB/s (737MB/s-737MB/s), io=4096MiB (4295MB), run=5825-5825msec +READ: bw=817MiB/s (856MB/s), 817MiB/s-817MiB/s (856MB/s-856MB/s), io=4096MiB (4295MB), run=5015-5015msec + +# Random +WRITE: bw=285MiB/s (298MB/s), 285MiB/s-285MiB/s (298MB/s-298MB/s), io=4096MiB (4295MB), run=14395-14395msec +READ: bw=93.6MiB/s (98.1MB/s), 93.6MiB/s-93.6MiB/s (98.1MB/s-98.1MB/s), io=4096MiB (4295MB), run=43773-43773msec +``` + +### Vdbench Multi-computer Performance Test + +Vdbench is a commonly used file system evaluation tool, and it supports concurrent multi-machine testing very well. + +#### Working Environment + +Similar to the JuiceFS Bench test environment, except that two more hosts with the same specifications were turned on, for a total of three. + +#### Preparation + +vdbench needs to be installed in the same path on each node: vdbench + +1. Download version 50406 from the [Official Website](https://www.oracle.com/downloads/server-storage/vdbench-downloads.html) +2. Install Java: `apt-get install openjdk-8-jre` +3. Verify that vdbench is installed successfully: `./vdbench -t` + +Then, assuming the names of the three nodes are node0, node1 and node2, you need to create a configuration file on node0 as follows (to test reading and writing a large number of small files): + +```bash +$ cat jfs-test +hd=default,vdbench=/root/vdbench50406,user=root +hd=h0,system=node0 +hd=h1,system=node1 +hd=h2,system=node2 + +fsd=fsd1,anchor=/mnt/jfs/vdbench,depth=1,width=100,files=3000,size=128k,shared=yes + +fwd=default,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd1,host=h0 +fwd=fwd2,host=h1 +fwd=fwd3,host=h2 + +rd=rd1,fwd=fwd*,fwdrate=max,format=yes,elapsed=300,interval=1 +``` + +Parameters description: + +- `vdbench=/root/vdbench50406`: specifies the path where the vdbench tool is installed +- `anchor=/mnt/jfs/vdbench`: specifies the path to run test tasks on each node +- `depth=1,width=100,files=3000,size=128k`: defines the test task file tree structure, i.e. 100 more directories are created under the test directory, each directory contains 3000 files of 128 KiB size, 300,000 files in total +- `operation=read,xfersize=128k,fileio=random,fileselect=random`: defines the actual test task, i.e., randomly selecting files to send 128 KiB size read requests + +The results are as follows: + +``` +FILE_CREATES Files created: 300,000 498/sec +READ_OPENS Files opened for read activity: 188,317 627/sec +``` + +The overall system speed for creating 128 KiB files is 498 files per second and reading files is 627 files per second. + +#### Other Reference Examples + +For reference, here are some profiles available for simple local evaluation of file system performance; the exact test set size and number of concurrency can be adjusted to suit the actual situation. + +##### Sequential reading and writing of large files + +The file size is 1GiB, where `fwd1` is a sequential write large file and `fwd2` is a sequential read large file. + +```bash +$ cat local-big +fsd=fsd1,anchor=/mnt/jfs/local-big,depth=1,width=1,files=4,size=1g,openflags=o_direct + +fwd=fwd1,fsd=fsd1,operation=write,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4 +fwd=fwd2,fsd=fsd1,operation=read,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4 + +rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1 +``` + +##### Random reading and writing of small files + +The file size is 128KiB, where `fwd1` is a random write small file, `fwd2` is a random read small file, and `fwd3` is a mixed read/write small file (read/write ratio = 7:3). + +```bash +$ cat local-small +fsd=fsd1,anchor=/mnt/jfs/local-small,depth=1,width=20,files=2000,size=128k,openflags=o_direct + +fwd=fwd1,fsd=fsd1,operation=write,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd2,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd3,fsd=fsd1,rdpct=70,xfersize=128k,fileio=random,fileselect=random,threads=4 + +rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd3,fwd=fwd3,fwdrate=max,format=restart,elapsed=120,interval=1 +``` diff --git a/docs/en/benchmark/stats_watcher.md b/docs/en/benchmark/stats_watcher.md new file mode 100644 index 0000000..f2814a0 --- /dev/null +++ b/docs/en/benchmark/stats_watcher.md @@ -0,0 +1,37 @@ +--- +sidebar_label: Performance Statistics Watcher +sidebar_position: 4 +slug: /stats_watcher +--- +# JuiceFS Performance Statistics Watcher + +JuiceFS pre-defined a lot of monitoring items to show internal performance statistics when the system is running. These items are [exposed](../administration/monitoring.md) by Prometheus API. However, when diagnosing performace issues, users may want a real-time monitorig tool to know what is actually going on within a certain time. Thus, the `stats` command is developed to display selected items every second, similar to the Linux tool `dstat`. The output is like: + +![stats_watcher](../images/juicefs_stats_watcher.png) + +By default, this command will monitor the JuiceFS process corresponding to the specified mount point, showing the following items: + +#### usage + +- cpu:CPU usage of the process +- mem:physical memory used by the process +- buf:current buffer size of JuiceFS, limited by mount option `--buffer-size` + +#### fuse + +- ops/lat:number of operations handled by FUSE per second, and the average latency (in milliseconds) of them +- read/write:read/write bandwidth handled by FUSE + +#### meta + +- ops/lat:number of metadata operations and the average latency (in milliseconds) of them. Please note operations returned directly in cache are not counted, so that the result is closer to real performance of metadata engines + +#### blockcache + +- read/write:read/write bandwidth of client local data cache + +#### object + +- get/put:Get/Put bandwidth between client and object storage + +Moreover, users can acquire verbose statistics (like read/write ops and the average latency) by setting `--verbosity 1`, or customize displayed items by changing `--schema`. For more information, please check `juicefs stats -h`. diff --git a/docs/en/client_compile_and_upgrade.md b/docs/en/client_compile_and_upgrade.md new file mode 100644 index 0000000..501e284 --- /dev/null +++ b/docs/en/client_compile_and_upgrade.md @@ -0,0 +1,40 @@ +# JuiceFS client compilation and upgrade + +For general users, it is recommended to directly visit the [releases](https://github.com/juicedata/juicefs/releases) page to download the pre-compiled version for installation and use. + +## Compile from source code + +If you want to experience the new features of JuiceFS first, you can clone the code from the `main` branch of our Github repository and manually compile the latest client. + +### Clone repository + +```shell +$ git clone https://github.com/juicedata/juicefs.git +``` + +### Compile + +The JuiceFS client is developed in Go language, so before compiling, you must install the dependent tools locally in advance: + +- [Go](https://golang.org) 1.16+ +- GCC 5.4+ + +> **Tip**: For users in China, in order to download the Go modules faster, it is recommended to set the mirror server through the `GOPROXY` environment variable. For example: [Goproxy China](https://github.com/goproxy/goproxy.cn). + +Enter the source code directory: + +```shell +$ cd juicefs +``` + +Compiling: + +```shell +$ make +``` + +After the compilation is successful, you can find the compiled `juicefs` binary program in the current directory. + +## JuiceFS client upgrade + +The JuiceFS client is a binary file named `juicefs`. You only need to replace the old version with the new version of the binary file when upgrading. diff --git a/docs/en/community/_adopters.md b/docs/en/community/_adopters.md new file mode 100644 index 0000000..2a8971f --- /dev/null +++ b/docs/en/community/_adopters.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Adopters +sidebar_position: 1 +--- +# Adopters \ No newline at end of file diff --git a/docs/en/community/_integrations.md b/docs/en/community/_integrations.md new file mode 100644 index 0000000..259e427 --- /dev/null +++ b/docs/en/community/_integrations.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Integrations +sidebar_position: 2 +--- +# Integrations \ No newline at end of file diff --git a/docs/en/community/_roadmap.md b/docs/en/community/_roadmap.md new file mode 100644 index 0000000..8cd3ef1 --- /dev/null +++ b/docs/en/community/_roadmap.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Roadmap +sidebar_position: 3 +--- +# Roadmap \ No newline at end of file diff --git a/docs/en/community/usage_tracking.md b/docs/en/community/usage_tracking.md new file mode 100644 index 0000000..3fafeef --- /dev/null +++ b/docs/en/community/usage_tracking.md @@ -0,0 +1,14 @@ +--- +sidebar_label: Usage Tracking +sidebar_position: 4 +--- + +# Usage Tracking + +JuiceFS by default collects and reports **anonymous** usage data. It only collects core metrics (e.g. version number, file system size), no user or any sensitive data will be collected. You could review related code [here](https://github.com/juicedata/juicefs/blob/main/pkg/usage/usage.go). + +These data help us understand how the community is using this project. You could disable reporting easily by command line option `--no-usage-report`: + +``` +$ juicefs mount --no-usage-report +``` diff --git a/docs/en/comparison/juicefs_vs_alluxio.md b/docs/en/comparison/juicefs_vs_alluxio.md new file mode 100644 index 0000000..8739391 --- /dev/null +++ b/docs/en/comparison/juicefs_vs_alluxio.md @@ -0,0 +1,72 @@ +# JuiceFS vs. Alluxio + +[Alluxio](https://www.alluxio.io) (/əˈlʌksio/) is a data access layer in the big data and machine learning ecosystem. Initially as research project "Tachyon", it was created at the University of California, Berkeley's [AMPLab](https://en.wikipedia.org/wiki/AMPLab) as creator's Ph.D. thesis in 2013. Alluxio was open sourced in 2014. + +The following table shows difference of main features between Alluxio and JuiceFS. + +| Features | Alluxio | JuiceFS | +| -------- | ------- | ------- | +| Storage format | Object | Block | +| Cache granularity | 64MiB | 4MiB | +| Multi-tier cache | ✓ | ✓ | +| Hadoop-compatible | ✓ | ✓ | +| S3-compatible | ✓ | ✓ | +| Kubernetes CSI Driver | ✓ | ✓ | +| Hadoop data locality | ✓ | ✓ | +| Fully POSIX-compatible | ✕ | ✓ | +| Atomic metadata operation | ✕ | ✓ | +| Consistency | ✕ | ✓ | +| Data compression | ✕ | ✓ | +| Data encryption | ✕ | ✓ | +| Zero-effort operation | ✕ | ✓ | +| Language | Java | Go | +| Open source license | Apache License 2.0 | Apache License 2.0 | +| Open source date | 2011 | 2021.1 | + +### Storage format + +The [storage format](../reference/how_juicefs_store_files.md) of one file in JuiceFS consists of three levels: chunk, slice and block. A file will be split into multiple blocks, and be compressed and encrypted (optional) store into object storage. + +Alluxio stores file as object to UFS. The file doesn't be split info blocks like JuiceFS does. + +### Cache granularity + +The [default block size](../reference/how_juicefs_store_files.md) of JuiceFS is 4MiB, compare to 64MiB of Alluxio, the granularity is smaller. The smaller block size is better for random read (e.g. Parquet and ORC) workload, i.e. cache management will be more efficiency. + +### Hadoop-compatible + +JuiceFS is [HDFS-compatible](../deployment/hadoop_java_sdk.md). Not only compatible with Hadoop 2.x and Hadoop 3.x, but also variety of components in Hadoop ecosystem. + +### Kubernetes CSI Driver + +JuiceFS provides [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver) to help people who want to use JuiceFS in Kubernetes. Alluxio provides [Kubernetes CSI Driver](https://github.com/Alluxio/alluxio-csi) too, but this project seems like not active maintained and not official supported by Alluxio. + +### Fully POSIX-compatible + +JuiceFS is [fully POSIX-compatible](../reference/posix_compatibility.md). One pjdfstest from [JD.com](https://www.slideshare.net/Alluxio/using-alluxio-posix-fuse-api-in-jdcom) shows that Alluxio didn't pass the POSIX compatibility test, e.g. Alluxio doesn't support symbolic link, truncate, fallocate, append, xattr, mkfifo, mknod and utimes. Besides the things covered by pjdfstest, JuiceFS also provides close-to-open consistency, atomic metadata operation, mmap, fallocate with punch hole, xattr, BSD locks (flock) and POSIX record locks (fcntl). + +### Atomic metadata operation + +A metadata operation in Alluxio has two steps: the first step is modify state of Alluxio master, the second step is send request to UFS. As you can see, the metadata operation isn't atomic, its state is unpredictable when the operation is executing or any failure occurs. Alluxio relies on UFS to implement metadata operations, for example rename file operation will become copy and delete operations. + +Thanks to [Redis transaction](https://redis.io/topics/transactions), **most of metadata operations of JuiceFS are atomic**, e.g. rename file, delete file, rename directory. You don't have to worry about the consistency and performance. + +### Consistency + +Alluxio loads metadata from the UFS as needed and it doesn't have information about UFS at startup. By default, Alluxio expects that all modifications to UFS occur through Alluxio. If changes are made to UFS directly, you need sync metadata between Alluxio and UFS either manually or periodically. As ["Atomic metadata operation"](#atomic-metadata-operation) section says, the two steps metadata operation may resulting in inconsistency. + +JuiceFS provides strong consistency, both metadata and data. **The metadata service of JuiceFS is the single source of truth, not a mirror of UFS.** The metadata service doesn't rely on object storage to obtain metadata. Object storage just be treated as an unlimited block storage. There isn't any inconsistency between JuiceFS and object storage. + +### Data compression + +JuiceFS supports use [LZ4](https://lz4.github.io/lz4) or [Zstandard](https://facebook.github.io/zstd) to compress all your data. Alluxio doesn't have this feature. + +### Data encryption + +JuiceFS supports data encryption in transit and at rest. Alluxio community edition doesn't have this feature, but [enterprise edition](https://docs.alluxio.io/ee/user/stable/en/operation/Security.html#end-to-end-data-encryption) has. + +### Zero-effort operation + +Alluxio's architecture can be divided into 3 components: master, worker and client. A typical cluster consists of a single leading master, standby masters, a job master, standby job masters, workers, and job workers. You need operation these masters and workers by yourself. + +JuiceFS uses Redis or [others](../reference/how_to_setup_metadata_engine.md) as the metadata engine. You could use service managed by public cloud provider easily as JuiceFS's metadata engine. There isn't any operation needed. diff --git a/docs/en/comparison/juicefs_vs_cephfs.md b/docs/en/comparison/juicefs_vs_cephfs.md new file mode 100644 index 0000000..b01b34d --- /dev/null +++ b/docs/en/comparison/juicefs_vs_cephfs.md @@ -0,0 +1,66 @@ +# JuiceFS vs. CephFS + +## Similarities + +Both are highly reliable, high-performance resilient distributed file systems with good POSIX compatibility, and can be tried in a variety of file system scenarios. + +## Differences + +### System Architecture + +Both use an architecture that separates data and metadata, but there are significant differences in component implementation. + +#### CephFS + +CephFS is a complete and independent system that prefers private cloud deployments; all data and metadata is persisted in Ceph's own storage pool (RADOS Pool). + +- Metadata + - Service Process (MDS): stateless and theoretically horizontally scalable. There are mature master-slave mechanisms, but multi-master deployments still have performance and stability concerns; production environments typically use one-master-multi-slaves or multi-master static isolation. + - Persistent: independent RADOS storage pools, usually with SSD or higher performance hardware storage +- Data: One or more RADOS storage pools, with different configurations specified by Layout, such as chunk size (default 4 MiB), redundancy (multi-copy, EC), etc. +- Client: kernel client (kcephfs), user state client (ceph-fuse) and SDKs for C++, Python, etc. based on libcephfs; recently the community has also provided a Windows client (ceph-dokan). There is also a VFS object for Samba and a FSAL module for NFS-Ganesha to be considered in the ecosystem. + +#### JuiceFS + +JuiceFS mainly implements a libjfs library and FUSE client application, Java SDK, etc. It supports interfacing with various metadata engines and object storage and is suitable for deployment in public, private or hybrid cloud environments. + +- Metadata: See [database implementation](../reference/how_to_setup_metadata_engine.md) for details, including: + - Redis and various variants of the Redis-compatible protocol (trader + - SQL family: MySQL, PostgreSQL, SQLite, etc. + - Distributed K/V storage: TiKV is supported and Apple FoundationDB is planned to be supported. + - Self-developed engine: JuiceFS fully managed service for use on the public cloud. +- Data: support for over 30 [object stores](../reference/how_to_setup_object_storage.md) on the public cloud and can also use with MinIO, Ceph RADOS, Ceph RGW, etc. +- Clients: Unix user state mount, Windows mount, Java SDK with full HDFS semantics compatibility, [Python SDK](https://github.com/megvii-research/juicefs-python) and a built-in S3 gateway. + +### Features + +| | CephFS | JuiceFS | +| ----------------------- | ---------- | ------------- | +| File chunking [1] | ✓ | ✓ | +| Metadata transactions | ✓ | ✓ | +| Strong consistency | ✓ | ✓ | +| Kubernetes CSI Driver | ✓ | ✓ | +| Hadoop-compatible | ✓ | ✓ | +| Data compression [2] | ✓ | ✓ | +| Data encryption [3] | ✓ | ✓ | +| Snapshot | ✓ | ✕ | +| Client data caching | ✕ | ✓ | +| Hadoop data Locality | ✕ | ✓ | +| S3-compatible | ✕ | ✓ | +| Quota | Directory level quota | Volume level quota | +| Languages | C++ | Go | +| License | LGPLv2.1 & LGPLv3 | Apache License 2.0 | + +#### [1] File Chunking + +CephFS splits files by [`object_size`](https://docs.ceph.com/en/latest/cephfs/file-layouts/#reading-layouts-with-getfattr) (default 4MiB), and each chunk corresponds to a RADOS object, while JuiceFS splits files by 64MiB chunks, and each chunk is further split into one or more logical slice(s) according to the actual situation when writing. Each Slice is further split into one or more logical Blocks when writing to the object store, and each Block corresponds to one object in the object store. When handling overwrite, CephFS needs to modify the corresponding objects directly, which is a complicated process; especially when the redundancy policy is EC or data compression is enabled, it often needs to read part of the object content first, modify it in memory, and then write it, which will bring a great performance overhead. JuiceFS writes the updated data as new objects and modifies the metadata when overwriting, which greatly improves the performance. Any redundant data that occurs during the process is garbage collected asynchronously. + +#### [2] Data Compression + +Strictly speaking, CephFS does not provide data compression itself, it actually relies on the RADOS layer BlueStore compression. JuiceFS, on the other hand, can compress data once before the Block is uploaded to the object store, in order to reduce the capacity used in the object storage. In other words, if you use JuiceFS to interface with RADOS, you can compress the Block once before and once after it enters RADOS. Also, as mentioned in **File Chunking**, CephFS does not normally enable BlueStore compression due to performance guarantees for overwrite writes. + +#### [3] Data Encryption + +Ceph **Messenger v2** supports data encryption at the network transport layer, while the storage layer is similar to compression, relying on the encryption provided at OSD creation. + +JuiceFS performs encryption and decryption before uploading objects and after downloading, and is completely transparent on the object storage side. diff --git a/docs/en/comparison/juicefs_vs_s3ql.md b/docs/en/comparison/juicefs_vs_s3ql.md new file mode 100644 index 0000000..ced2e2e --- /dev/null +++ b/docs/en/comparison/juicefs_vs_s3ql.md @@ -0,0 +1,113 @@ +# JuiceFS vs. S3QL + +Similar to JuiceFS, [S3QL](https://github.com/s3ql/s3ql) is also an open source network file system driven by object storage and database. All data will be split into blocks and stored in object storage services such as, Amazon S3, Backblaze B2, or OpenStack Swift, the corresponding metadata will be stored in the database. + +## The same point + +- All support the standard POSIX file system interface through the FUSE module, so that massive cloud storage can be mounted locally and used like local storage. +- All can provide standard file system functions: hard links, symbolic links, extended attributes, file permissions. +- All support data compression and encryption, but the algorithms used are different. + +## Different points + +- S3QL only supports SQLite. But JuiceFS supports more databases, such as Redis, TiKV, MySQL, PostgreSQL, and SQLite. +- S3QL has no distributed capability and **does not** support multi-host shared mounting. JuiceFS is a typical distributed file system. When using a network-based database, it supports multi-host distributed mount read and write. +- S3QL commits a data block to S3 when it has not been accessed for more than a few seconds. After a file closed or even fsynced, it is only guranteed to stay in system memory, which may result in data loss if node fails. JuiceFS ensures high data durability, uploading all blocks synchronously when a file is closed. +- S3QL provides data deduplication. Only one copy of the same data is stored, which can reduce the storage usage, but it will also increase the performance overhead of the system. JuiceFS pays more attention to performance, and it is too expensive to perform deduplication on large-scale data, so this function is temporarily not provided. +- S3QL provides remote synchronous backup of metadata. SQLite databases with metadata will be backed up asynchronously to object storage. JuiceFS mainly uses network databases such as Redis and MySQL, and does not directly provide SQLite database synchronization backup function, but JuiceFS supports metadata import and export, as well as various storage backend synchronization functions, users can easily backup metadata to objects Storage, also supports migration between different databases. + +| | **S3QL** | **JuiceFS** | +| :------------------------ | :-------------------- | :---------------------------- | +| Metadata engine | SQLite | Redis, MySQL, SQLite, TiKV | +| Storage engine | Object Storage, Local | Object Storage, WebDAV, Local | +| Operating system | Unix-like | Linux, macOS, Windows | +| Compression algorithm | LZMA, bzip2, gzip | lz4, zstd | +| Encryption algorithm | AES-256 | AES-GCM, RSA | +| POSIX compatible | ✓ | ✓ | +| Hard link | ✓ | ✓ | +| Symbolic link | ✓ | ✓ | +| Extended attributes | ✓ | ✓ | +| Standard Unix permissions | ✓ | ✓ | +| Data block | ✓ | ✓ | +| Local cache | ✓ | ✓ | +| Elastic storage | ✓ | ✓ | +| Metadata backup | ✓ | ✓ | +| Data deduplication | ✓ | ✕ | +| Immutable trees | ✓ | ✕ | +| Snapshots | ✓ | ✕ | +| Share mount | ✕ | ✓ | +| Hadoop SDK | ✕ | ✓ | +| Kubernetes CSI Driver | ✕ | ✓ | +| S3 gateway | ✕ | ✓ | +| Language | Python | Go | +| Open source license | GPLv3 | Apache License 2.0 | +| Open source date | 2011 | 2021.1 | + +## Usage + +This part mainly evaluates the ease of installation and use of the two products. + +### Installation + +During the installation process, we use Rocky Linux 8.4 operating system (kernel version 4.18.0-305.12.1.el8_4.x86_64). + +#### S3QL + +S3QL is developed in Python and requires python-devel 3.7 and above to be installed. In addition, at least the following dependencies must be met: fuse3-devel, gcc, pyfuse3, sqlite-devel, cryptography, defusedxml, apsw, dugong. In addition, you need to pay special attention to Python's package dependencies and location issues. + +S3QL will install 12 binary programs in the system, and each program provides an independent function, as shown in the figure below. + +![](../images/s3ql-bin.jpg) + +#### JuiceFS + +JuiceFS is developed in Go and can be used directly by downloading the pre-compiled binary file. The JuiceFS client has only one binary program `juicefs`, just copy it to any executable path of the system, for example: `/usr/local/bin`. + +### Create and Mount a file system + +Both S3QL and JuiceFS use database to store metadata. S3QL only supports SQLite databases, and JuiceFS supports databases such as Redis, TiKV, MySQL, MariaDB, PostgreSQL, and SQLite. + +Here we use Minio object storage created locally and use them to create a file system separately: + +#### S3QL + +S3QL uses `mkfs.s3ql` to create a file system: + +```shell +$ mkfs.s3ql --plain --backend-options no-ssl -L s3ql s3c://127.0.0.1:9000/s3ql/ +``` + +Mount a file system using `mount.s3ql`: + +```shell +$ mount.s3ql --compress none --backend-options no-ssl s3c://127.0.0.1:9000/s3ql/ mnt-s3ql +``` + +S3QL needs to interactively provide the access key of the object storage API through the command line when creating and mounting a file system. + +#### JuiceFS + +JuiceFS uses the `format` subcommand to create a file system: + +```shell +$ juicefs format --storage minio \ + --bucket http://127.0.0.1:9000/myjfs \ + --access-key minioadmin \ + --secret-key minioadmin \ + sqlite3://myjfs.db \ + myjfs +``` + +Mount a file system using `mount` subcommand: + +```shell +$ sudo juicefs mount -d sqlite3://myjfs.db mnt-juicefs +``` + +JuiceFS only sets the object storage API access key when creating a file system, and the relevant information will be written into the metadata engine. After created, there is no need to repeatedly provide the object storage url, access key and other information. + +## Summary + +**S3QL** adopts the storage structure of object storage + SQLite, and storing the data in blocks can not only improve the read and write efficiency of the file, but also reduce the resource overhead when the file is modified. The advanced features such as snapshots, data deduplication, and data retention, as well as the default data compression and data encryption, making S3QL very suitable for individuals to store files in cloud storage at a lower cost and more securely. + +**JuiceFS** supports object storage, HDFS, WebDAV, and local disks as data storage engines, and supports popular databases such as Redis, TiKV, MySQL, MariaDB, PostgreSQL, and SQLite as metadata storage engines. It provides a standard POSIX file system interface through FUSE, and a Java API, which can directly replace HDFS to provide storage for Hadoop. At the same time, it also provides [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver), which can be used as the storage layer of Kubernetes for data persistent storage. JucieFS is a file system designed for enterprise-level distributed data storage scenarios. It is widely used in various scenarios such as big data analysis, machine learning, container shared storage, data sharing, and backup. diff --git a/docs/en/deployment/_share_via_nfs.md b/docs/en/deployment/_share_via_nfs.md new file mode 100644 index 0000000..ff2d980 --- /dev/null +++ b/docs/en/deployment/_share_via_nfs.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Deploy JuiceFS with NFS +sidebar_position: 5 +--- +# Deploy JuiceFS with NFS \ No newline at end of file diff --git a/docs/en/deployment/_share_via_smb.md b/docs/en/deployment/_share_via_smb.md new file mode 100644 index 0000000..13a6812 --- /dev/null +++ b/docs/en/deployment/_share_via_smb.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Deploy JuiceFS with SMB +sidebar_position: 6 +--- +# Deploy JuiceFS with SMB \ No newline at end of file diff --git a/docs/en/deployment/hadoop_java_sdk.md b/docs/en/deployment/hadoop_java_sdk.md new file mode 100644 index 0000000..7e17b7c --- /dev/null +++ b/docs/en/deployment/hadoop_java_sdk.md @@ -0,0 +1,589 @@ +--- +sidebar_label: Use JuiceFS on Hadoop Ecosystem +sidebar_position: 3 +slug: /hadoop_java_sdk +--- +# Use JuiceFS on Hadoop Ecosystem + +JuiceFS provides [Hadoop-compatible FileSystem](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/filesystem/introduction.html) by Hadoop Java SDK. Various applications in the Hadoop ecosystem can smoothly use JuiceFS to store data without changing the code. + +## Requirements + +### 1. Hadoop and related components + +JuiceFS Hadoop Java SDK is compatible with Hadoop 2.x and Hadoop 3.x. As well as variety of components in Hadoop ecosystem. + +### 2. User permissions + +JuiceFS use local mapping of `user` and `UID`. So, you should [sync all the needed users and their UIDs](../administration/sync_accounts_between_multiple_hosts.md) across the whole Hadoop cluster to avoid permission error. You can also specify a global user list and user group file, please refer to the [relevant configurations](#other-configurations). + +### 3. File system + +You should first create at least one JuiceFS file system to provide storage for components related to the Hadoop ecosystem through the JuiceFS Java SDK. When deploying the Java SDK, specify the metadata engine address of the created file system in the configuration file. + +To create a file system, please refer to [JuiceFS Quick Start Guide](../getting-started/for_local.md). + +:::note +If you want to use JuiceFS in a distributed environment, when creating a file system, please plan the object storage and database to be used reasonably to ensure that they can be accessed by each node in the cluster. +::: + +### 4. Memory + +JuiceFS Hadoop Java SDK need extra 4 * [`juicefs.memory-size`](#io-configurations) off-heap memory at most. By default, up to 1.2 GB of additional memory is required (depends on write load). + +## Client compilation + +:::note +No matter which system environment the client is compiled for, the compiled JAR file has the same name and can only be deployed in the matching system environment. For example, when compiled in Linux, it can only be used in the Linux environment. In addition, since the compiled package depends on glibc, it is recommended to compile with a lower version system to ensure better compatibility. +::: + +Compilation depends on the following tools: + +- [Go](https://golang.org/) 1.16+ +- JDK 8+ +- [Maven](https://maven.apache.org/) 3.3+ +- git +- make +- GCC 5.4+ + +### Linux and macOS + +Clone the repository: + +```shell +$ git clone https://github.com/juicedata/juicefs.git +``` + +Enter the directory and compile: + +:::note +If Ceph RADOS is used to store data, you need to install `librados-dev` first and [build `libjfs.so`](https://github.com/juicedata/juicefs/blob/main/sdk/java/libjfs/Makefile#L22) with `-tags ceph`. +::: + +```shell +$ cd juicefs/sdk/java +$ make +``` + +After the compilation, you can find the compiled `JAR` file in the `sdk/java/target` directory, including two versions: + +- Contains third-party dependent packages: `juicefs-hadoop-X.Y.Z.jar` +- Does not include third-party dependent packages: `original-juicefs-hadoop-X.Y.Z.jar` + +It is recommended to use a version that includes third-party dependencies. + +### Windows + +The client used in the Windows environment needs to be obtained through cross-compilation on Linux or macOS. The compilation depends on [mingw-w64](https://www.mingw-w64.org/), which needs to be installed first. + +The steps are the same as compiling on Linux or macOS. For example, on the Ubuntu system, install the `mingw-w64` package first to solve the dependency problem: + +```shell +$ sudo apt install mingw-w64 +``` + +Clone and enter the JuiceFS source code directory, execute the following code to compile: + +```shell +$ cd juicefs/sdk/java +$ make win +``` + +## Deploy the client + +To enable each component of the Hadoop ecosystem to correctly identify JuiceFS, the following configurations are required: + +1. Place the compiled JAR file and `$JAVA_HOME/lib/tools.jar` into the `classpath` of the component. The installation paths of common big data platforms and components are shown in the table below. +2. Put JuiceFS configurations into the configuration file of each Hadoop ecosystem component (usually `core-site.xml`), see [Client Configurations](#client-configurations) for details. + +It is recommended to place the JAR file in a fixed location, and the other locations are called it through symbolic links. + +### Big Data Platforms + +| Name | Installing Paths | +| ----------------- | ------------------------------------------------------------ | +| CDH | `/opt/cloudera/parcels/CDH/lib/hadoop/lib`

`/opt/cloudera/parcels/CDH/spark/jars`

`/var/lib/impala` | +| HDP | `/usr/hdp/current/hadoop-client/lib`

`/usr/hdp/current/hive-client/auxlib`

`/usr/hdp/current/spark2-client/jars` | +| Amazon EMR | `/usr/lib/hadoop/lib`

`/usr/lib/spark/jars`

`/usr/lib/hive/auxlib` | +| Alibaba Cloud EMR | `/opt/apps/ecm/service/hadoop/*/package/hadoop*/share/hadoop/common/lib`

`/opt/apps/ecm/service/spark/*/package/spark*/jars`

`/opt/apps/ecm/service/presto/*/package/presto*/plugin/hive-hadoop2`

`/opt/apps/ecm/service/hive/*/package/apache-hive*/lib`

`/opt/apps/ecm/service/impala/*/package/impala*/lib` | +| Tencent Cloud EMR | `/usr/local/service/hadoop/share/hadoop/common/lib`

`/usr/local/service/presto/plugin/hive-hadoop2`

`/usr/local/service/spark/jars`

`/usr/local/service/hive/auxlib` | +| UCloud UHadoop | `/home/hadoop/share/hadoop/common/lib`

`/home/hadoop/hive/auxlib`

`/home/hadoop/spark/jars`

`/home/hadoop/presto/plugin/hive-hadoop2` | +| Baidu Cloud EMR | `/opt/bmr/hadoop/share/hadoop/common/lib`

`/opt/bmr/hive/auxlib`

`/opt/bmr/spark2/jars` | + +### Community Components + +| Name | Installing Paths | +| ------ | ------------------------------------ | +| Spark | `${SPARK_HOME}/jars` | +| Presto | `${PRESTO_HOME}/plugin/hive-hadoop2` | +| Flink | `${FLINK_HOME}/lib` | + +### Client Configurations + +Please refer to the following table to set the relevant parameters of the JuiceFS file system and write it into the configuration file, which is generally `core-site.xml`. + +#### Core Configurations + +| Configuration | Default Value | Description | +| -------------------------------- | ---------------------------- | ------------------------------------------------------------ | +| `fs.jfs.impl` | `io.juicefs.JuiceFileSystem` | Specify the storage implementation to be used. By default, `jfs://` scheme is used. If you want to use different scheme (e.g. `cfs://`), just modify it to `fs.cfs.impl`. No matter what sheme you use, it is always access the data in JuiceFS. | +| `fs.AbstractFileSystem.jfs.impl` | `io.juicefs.JuiceFS` | Specify the storage implementation to be used. By default, `jfs://` scheme is used. If you want to use different scheme (e.g. `cfs://`), just modify it to `fs.AbstractFileSystem.cfs.impl`. No matter what sheme you use, it is always access the data in JuiceFS. | +| `juicefs.meta` | | Specify the metadata engine address of the pre-created JuiceFS file system. You can configure multiple file systems for the client at the same time through the format of `juicefs.{vol_name}.meta`. Refer to ["Multiple file systems configuration"](#multiple-file-systems-configuration). | + +#### Cache Configurations + +| Configuration | Default Value | Description | +| ---------------------------- | ------------- | ------------------------------------------------------------ | +| `juicefs.cache-dir` | | Directory paths of local cache. Use colon to separate multiple paths. Also support wildcard in path. **It's recommended create these directories manually and set `0777` permission so that different applications could share the cache data.** | +| `juicefs.cache-size` | 0 | Maximum size of local cache in MiB. The default value is 0, which means that caching is disabled. It's the total size when set multiple cache directories. | +| `juicefs.cache-full-block` | `true` | Whether cache every read blocks, `false` means only cache random/small read blocks. | +| `juicefs.free-space` | 0.1 | Min free space ratio of cache directory | +| `juicefs.attr-cache` | 0 | Expire of attributes cache in seconds | +| `juicefs.entry-cache` | 0 | Expire of file entry cache in seconds | +| `juicefs.dir-entry-cache` | 0 | Expire of directory entry cache in seconds | +| `juicefs.discover-nodes-url` | | The URL to discover cluster nodes, refresh every 10 minutes.

YARN: `yarn`
Spark Standalone: `http://spark-master:web-ui-port/json/`
Spark ThriftServer: `http://thrift-server:4040/api/v1/applications/`
Presto: `http://coordinator:discovery-uri-port/v1/service/presto/` | + +#### I/O Configurations + +| Configuration | Default Value | Description | +| ------------------------ | ------------- | ----------------------------------------------- | +| `juicefs.max-uploads` | 20 | The max number of connections to upload | +| `juicefs.max-deletes` | 2 | The max number of connections to delete | +| `juicefs.get-timeout` | 5 | The max number of seconds to download an object | +| `juicefs.put-timeout` | 60 | The max number of seconds to upload an object | +| `juicefs.memory-size` | 300 | Total read/write buffering in MiB | +| `juicefs.prefetch` | 1 | Prefetch N blocks in parallel | +| `juicefs.upload-limit` | 0 | Bandwidth limit for upload in Mbps | +| `juicefs.download-limit` | 0 | Bandwidth limit for download in Mbps | + +#### Other Configurations + +| Configuration | Default Value | Description | +| ------------------------- | ------------- | ------------------------------------------------------------ | +| `juicefs.bucket` | | Specify a different endpoint for object storage | +| `juicefs.debug` | `false` | Whether enable debug log | +| `juicefs.access-log` | | Access log path. Ensure Hadoop application has write permission, e.g. `/tmp/juicefs.access.log`. The log file will rotate automatically to keep at most 7 files. | +| `juicefs.superuser` | `hdfs` | The super user | +| `juicefs.users` | `null` | The path of username and UID list file, e.g. `jfs://name/etc/users`. The file format is `:`, one user per line. | +| `juicefs.groups` | `null` | The path of group name, GID and group members list file, e.g. `jfs://name/etc/groups`. The file format is `::,`, one group per line. | +| `juicefs.umask` | `null` | The umask used when creating files and directories (e.g. `0022`), default value is `fs.permissions.umask-mode`. | +| `juicefs.push-gateway` | | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) address, format is `:`. | +| `juicefs.push-interval` | 10 | Prometheus push interval in seconds | +| `juicefs.push-auth` | | [Prometheus basic auth](https://prometheus.io/docs/guides/basic-auth) information, format is `:`. | +| `juicefs.fast-resolve` | `true` | Whether enable faster metadata lookup using Redis Lua script | +| `juicefs.no-usage-report` | `false` | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected. | + +#### Multiple file systems configuration + +When multiple JuiceFS file systems need to be used at the same time, all the above configuration items can be specified for a specific file system. You only need to put the file system name in the middle of the configuration item, such as `jfs1` and `jfs2` in the following example: + +```xml + + juicefs.jfs1.meta + redis://jfs1.host:port/1 + + + juicefs.jfs2.meta + redis://jfs2.host:port/1 + +``` + +#### Configuration Example + +The following is a commonly used configuration example. Please replace the `{HOST}`, `{PORT}` and `{DB}` variables in the `juicefs.meta` configuration with actual values. + +```xml + + fs.jfs.impl + io.juicefs.JuiceFileSystem + + + fs.AbstractFileSystem.jfs.impl + io.juicefs.JuiceFS + + + juicefs.meta + redis://{HOST}:{PORT}/{DB} + + + juicefs.cache-dir + /data*/jfs + + + juicefs.cache-size + 1024 + + + juicefs.access-log + /tmp/juicefs.access.log + +``` + +## Configuration in Hadoop + +Please refer to the aforementioned configuration tables and add configuration parameters to the Hadoop configuration file `core-site.xml`. + +### CDH6 + +If you are using CDH 6, in addition to modifying `core-site`, you also need to modify `mapreduce.application.classpath` through the YARN service interface, adding: + +```shell +$HADOOP_COMMON_HOME/lib/juicefs-hadoop.jar +``` + +### HDP + +In addition to modifying `core-site`, you also need to modify the configuration `mapreduce.application.classpath` through the MapReduce2 service interface and add it at the end (variables do not need to be replaced): + +```shell +/usr/hdp/${hdp.version}/hadoop/lib/juicefs-hadoop.jar +``` + +### Flink + +Add configuration parameters to `conf/flink-conf.yaml`. If you only use JuiceFS in Flink, you don't need to configure JuiceFS in the Hadoop environment, you only need to configure the Flink client. + +### Hudi + +:::note +The latest version of Hudi (v0.10.0) does not yet support JuiceFS, you need to compile the latest master branch yourself. +::: + +Please refer to ["Hudi Official Documentation"](https://hudi.apache.org/docs/next/jfs_hoodie) to learn how to configure JuiceFS. + +### Restart Services + +When the following components need to access JuiceFS, they should be restarted. + +:::note +Before restart, you need to confirm JuiceFS related configuration has been written to the configuration file of each component, usually you can find them in `core-site.xml` on the machine where the service of the component was deployed. +::: + +| Components | Services | +| ---------- | -------------------------- | +| Hive | HiveServer
Metastore | +| Spark | ThriftServer | +| Presto | Coordinator
Worker | +| Impala | Catalog Server
Daemon | +| HBase | Master
RegionServer | + +HDFS, Hue, ZooKeeper and other services don't need to be restarted. + +When `Class io.juicefs.JuiceFileSystem not found` or `No FilesSystem for scheme: jfs` exceptions was occurred after restart, reference [FAQ](#faq). + +## Environmental Verification + +After the deployment of the JuiceFS Java SDK, the following methods can be used to verify the success of the deployment. + +### Hadoop + +```bash +$ hadoop fs -ls jfs://{JFS_NAME}/ +``` + +:::info +The `JFS_NAME` is the volume name when you format JuiceFS file system. +::: + +### Hive + +```sql +CREATE TABLE IF NOT EXISTS person +( + name STRING, + age INT +) LOCATION 'jfs://{JFS_NAME}/tmp/person'; +``` + +## Monitoring metrics collection + +Please see the ["Monitoring"](../administration/monitoring.md) documentation to learn how to collect and display JuiceFS monitoring metrics. + +## Benchmark + +Here are a series of methods to use the built-in stress testing tool of the JuiceFS client to test the performance of the client environment that has been successfully deployed. + + +### 1. Local Benchmark + +#### Metadata + +- **create** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench create -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + + This command will create 10000 empty files + +- **open** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench open -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + + This command will open 10000 files without reading data + +- **rename** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench rename -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + +- **delete** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench delete -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + +- **For reference** + + | Operation | TPS | Latency (ms) | + | --------- | ---- | ------------ | + | create | 644 | 1.55 | + | open | 3467 | 0.29 | + | rename | 483 | 2.07 | + | delete | 506 | 1.97 | + +#### I/O Performance + +- **sequential write** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -write -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local + ``` + +- **sequential read** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -read -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local + ``` + + When run the cmd for the second time, the result may be much better than the first run. It's because the data was cached in memory, just clean the local disk cache. + +- **For reference** + + | Operation | Throughput (MB/s) | + | --------- | ----------------- | + | write | 647 | + | read | 111 | + +If the network bandwidth of the machine is relatively low, it can generally reach the network bandwidth bottleneck. + +### 2. Distributed Benchmark + +The following command will start the MapReduce distributed task to test the metadata and IO performance. During the test, it is necessary to ensure that the cluster has sufficient resources to start the required map tasks. + +Computing resources used in this test: + +- **Server**: 4 cores and 32 GB memory, burst bandwidth 5Gbit/s x 3 +- **Database**: Alibaba Cloud Redis 5.0 Community 4G Master-Slave Edition + +#### Metadata + +- **create** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench create -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 10 map task, each has 10 threads, each thread create 1000 empty file. 100000 files in total + +- **open** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench open -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 10 map task, each has 10 threads, each thread open 1000 file. 100000 files in total + +- **rename** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench rename -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 10 map task, each has 10 threads, each thread rename 1000 file. 100000 files in total + +- **delete** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench delete -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 10 map task, each has 10 threads, each thread delete 1000 file. 100000 files in total + +- **For reference** + + - 10 threads + + | Operation | IOPS | Latency (ms) | + | --------- | ---- | ------------ | + | create | 4178 | 2.2 | + | open | 9407 | 0.8 | + | rename | 3197 | 2.9 | + | delete | 3060 | 3.0 | + + - 100 threads + + | Operation | IOPS | Latency (ms) | + | --------- | ---- | ------------ | + | create | 11773 | 7.9 | + | open | 34083 | 2.4 | + | rename | 8995 | 10.8 | + | delete | 7191 | 13.6 | + +#### I/O Performance + +- **sequential write** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -write -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO + ``` + + 10 map task, each task write 10000MB random data sequentially + +- **sequential read** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -read -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO + ``` + + 10 map task, each task read 10000MB random data sequentially + + +- **For reference** + + | Operation | Average throughput (MB/s) | Total Throughput (MB/s) | + | --------- | ------------------------- | ----------------------- | + | write | 198 | 1835 | + | read | 124 | 1234 | + +### 3. TPC-DS + +The test dataset is 100GB in size, and both Parquet and ORC file formats are tested. + +This test only tests the first 10 queries. + +Spark Thrift JDBC/ODBC Server is used to start the Spark resident process and then submit the task via Beeline connection. + +#### Test Hardware + +| Node Category | Instance Type | CPU | Memory | Disk | Number | +| ------------- | ------------- | --- | ------ | ---- | ------ | +| Master | Alibaba Cloud ecs.r6.xlarge | 4 | 32GiB | System Disk: 100GiB | 1 | +| Core | Alibaba Cloud ecs.r6.xlarge | 4 | 32GiB | System Disk: 100GiB
Data Disk: 500GiB Ultra Disk x 2 | 3 | + +#### Software Configuration + +##### Spark Thrift JDBC/ODBC Server + +```shell +${SPARK_HOME}/sbin/start-thriftserver.sh \ + --master yarn \ + --driver-memory 8g \ + --executor-memory 10g \ + --executor-cores 3 \ + --num-executors 3 \ + --conf spark.locality.wait=100 \ + --conf spark.sql.crossJoin.enabled=true \ + --hiveconf hive.server2.thrift.port=10001 +``` + +##### JuiceFS Cache Configurations + +The 2 data disks of Core node are mounted in the `/data01` and `/data02` directories, and `core-site.xml` is configured as follows: + +```xml + + juicefs.cache-size + 200000 + + + juicefs.cache-dir + /data*/jfscache + + + juicefs.cache-full-block + false + + + juicefs.discover-nodes-url + yarn + + + juicefs.attr-cache + 3 + + + juicefs.entry-cache + 3 + + + juicefs.dir-entry-cache + 3 + +``` + +#### Test + +The task submission command is as follows: + +```shell +${SPARK_HOME}/bin/beeline -u jdbc:hive2://localhost:10001/${DATABASE} \ + -n hadoop \ + -f query{i}.sql +``` + +#### Results + +JuiceFS can use local disk as a cache to accelerate data access, the following data is the result (in seconds) after 4 runs using Redis and TiKV as the metadata engine of JuiceFS respectively. + +##### ORC + +| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS | +| ------- | --------------- | -------------- | ---- | +| q1 | 20 | 20 | 20 | +| q2 | 28 | 33 | 26 | +| q3 | 24 | 27 | 28 | +| q4 | 300 | 309 | 290 | +| q5 | 116 | 117 | 91 | +| q6 | 37 | 42 | 41 | +| q7 | 24 | 28 | 23 | +| q8 | 13 | 15 | 16 | +| q9 | 87 | 112 | 89 | +| q10 | 23 | 24 | 22 | + +![orc](../images/spark_ql_orc.png) + +##### Parquet + +| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS | +| ------- | --------------- | -------------- | ---- | +| q1 | 33 | 35 | 39 | +| q2 | 28 | 32 | 31 | +| q3 | 23 | 25 | 24 | +| q4 | 273 | 284 | 266 | +| q5 | 96 | 107 | 94 | +| q6 | 36 | 35 | 42 | +| q7 | 28 | 30 | 24 | +| q8 | 11 | 12 | 14 | +| q9 | 85 | 97 | 77 | +| q10 | 24 | 28 | 38 | + +![parquet](../images/spark_sql_parquet.png) + + +## FAQ + +### 1. `Class io.juicefs.JuiceFileSystem not found` exception + +It means JAR file was not loaded, you can verify it by `lsof -p {pid} | grep juicefs`. + +You should check whether the JAR file was located properly, or other users have the read permission. + +Some Hadoop distribution also need to modify `mapred-site.xml` and put the JAR file location path to the end of the parameter `mapreduce.application.classpath`. + +### 2. `No FilesSystem for scheme: jfs` exception + +It means JuiceFS Hadoop Java SDK was not configured properly, you need to check whether there is JuiceFS related configuration in the `core-site.xml` of the component configuration. diff --git a/docs/en/deployment/how_to_use_on_kubernetes.md b/docs/en/deployment/how_to_use_on_kubernetes.md new file mode 100644 index 0000000..0f72964 --- /dev/null +++ b/docs/en/deployment/how_to_use_on_kubernetes.md @@ -0,0 +1,370 @@ +--- +sidebar_label: Use JuiceFS on Kubernetes +sidebar_position: 2 +slug: /how_to_use_on_kubernetes +--- +# Use JuiceFS on Kubernetes + +JuiceFS is very suitable as a storage layer for Kubernetes clusters, and there are currently two common usages. + +## JuiceFS CSI Driver + +[JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver) follows [CSI](https://github.com/container-storage-interface/spec/blob/master/spec.md) specification, which implements the interface between the container orchestration system and the JuiceFS file system, and supports the dynamic configuration of JuiceFS volumes for use by Pod. + +### Prerequisites + +- Kubernetes 1.14+ + +### Installation + +JuiceFS CSI Driver has the following two installation methods. + +#### Install with Helm + +Helm is the package manager of Kubernetes, and Chart is the package managed by Helm. You can think of it as the equivalent of Homebrew formula, APT dpkg, or YUM RPM in Kubernetes. + +This installation method requires Helm **3.1.0** and above. For the specific installation method, please refer to ["Helm Installation Guide"](https://github.com/helm/helm#install). + +1. Prepare a configuration file for setting the basic information of the storage class, for example: `values.yaml`, copy and complete the following configuration information. Among them, the `backend` part is JuiceFS file system related information, you can refer to [JuiceFS Quick Start Guide](../getting-started/for_local.md) for related content. If you are using a JuiceFS volume that has been created in advance, you only need to fill in the two items `name` and `metaurl`. The `mountPod` part can set the resource configuration of CPU and memory for the Pod using this driver. Unneeded items can be deleted, or its value can be left blank. + +```yaml +storageClasses: +- name: juicefs-sc + enabled: true + reclaimPolicy: Retain + backend: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + accessKey: "" + secretKey: "" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + mountPod: + resources: + limits: + cpu: "" + memory: "" + requests: + cpu: "" + memory: "" +``` + +On cloud platforms that support "role management", you can assign "service role" to Kubernetes nodes to achieve key-free access to the object storage API. In this case, there is no need to set the `accessKey` and `secretKey` in the configuration file. + +2. Execute the following three commands in sequence to deploy JuiceFS CSI Driver through Helm. + +```shell +$ helm repo add juicefs-csi-driver https://juicedata.github.io/juicefs-csi-driver/ +$ helm repo update +$ helm install juicefs-csi-driver juicefs-csi-driver/juicefs-csi-driver -n kube-system -f ./values.yaml +``` + +3. Check the deployment + +- **Check Pods**: the deployment will launch a `StatefulSet` named `juicefs-csi-controller` with replica `1` and a `DaemonSet` named `juicefs-csi-node`, so run `kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver` should see `n+1` (where `n` is the number of worker nodes of the Kubernetes cluster) pods is running. For example: + +```sh +$ kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver +NAME READY STATUS RESTARTS AGE +juicefs-csi-controller-0 3/3 Running 0 22m +juicefs-csi-node-v9tzb 3/3 Running 0 14m +``` + +- **Check secret**: `kubectl -n kube-system describe secret juicefs-sc-secret` will show the secret with above `backend` fields in `values.yaml`: + +```sh +$ kubectl -n kube-system describe secret juicefs-sc-secret +Name: juicefs-sc-secret +Namespace: kube-system +Labels: app.kubernetes.io/instance=juicefs-csi-driver + app.kubernetes.io/managed-by=Helm + app.kubernetes.io/name=juicefs-csi-driver + app.kubernetes.io/version=0.7.0 + helm.sh/chart=juicefs-csi-driver-0.1.0 +Annotations: meta.helm.sh/release-name: juicefs-csi-driver + meta.helm.sh/release-namespace: default + +Type: Opaque + +Data +==== +access-key: 0 bytes +bucket: 47 bytes +metaurl: 54 bytes +name: 4 bytes +secret-key: 0 bytes +storage: 2 bytes +``` + +- **Check storage class**: `kubectl get sc juicefs-sc` will show the storage class like this: + +```sh +$ kubectl get sc juicefs-sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +juicefs-sc csi.juicefs.com Retain Immediate false 69m +``` + +#### Install with kubectl + +Since Kubernetes will discard some old APIs during the version change process, you need to select the applicable deployment file according to the version of Kubernetes you are using: + +**Kubernetes v1.18 and above** + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml +``` + +**Version below Kubernetes v1.18** + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s_before_v1_18.yaml +``` + +**Create storage class** + +Create a configuration file with reference to the following content, for example: `juicefs-sc.yaml`, fill in the configuration information of the JuiceFS file system in the `stringData` section: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: juicefs-sc-secret + namespace: kube-system +type: Opaque +stringData: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + access-key: "" + secret-key: "" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +volumeBindingMode: Immediate +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +Execute the command to deploy the storage class: + +```shell +$ kubectl apply -f ./juicefs-sc.yaml +``` + +In addition, you can also extract the Secret part of the above configuration file and create it on the command line through `kubectl`: + +```bash +$ kubectl -n kube-system create secret generic juicefs-sc-secret \ + --from-literal=name=test \ + --from-literal=metaurl=redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3 \ + --from-literal=storage=s3 \ + --from-literal=bucket=https://juicefs-test.s3.us-east-1.amazonaws.com \ + --from-literal=access-key="" \ + --from-literal=secret-key="" +``` + +In this way, the storage class configuration file `juicefs-sc.yaml` should look like the following: + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +Then deploy the storage class through `kubectl apply`: + +```shell +$ kubectl apply -f ./juicefs-sc.yaml +``` + +### Use JuiceFS + +JuiceFS CSI Driver supports both static and dynamic PV. You can either manually assign the PV created in advance to Pods, or you can dynamically create volumes through PVC when Pods are deployed. + +For example, you can use the following configuration to create a configuration file named `development.yaml`, which creates a persistent volume for the Nginx container through PVC and mounts it to the container's `/config` directory: + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: web-pvc +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 10Pi + storageClassName: juicefs-sc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run +spec: + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /config + name: web-data + volumes: + - name: web-data + persistentVolumeClaim: + claimName: web-pvc +``` + +Deploy Pods via `kubectl apply`: + +``` +$ kubectl apply -f ./development.yaml +``` + +After the deployment is successful, check the pods status: + +```shell +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-run-7d6fb7d6df-cfsvp 1/1 Running 0 21m +``` + +We can simply use the `kubectl exec` command to view the file system mount status in the container: + +```shell +$ kubectl exec nginx-run-7d6fb7d6df-cfsvp -- df -Th +Filesystem Type Size Used Avail Use% Mounted on +overlay overlay 40G 7.0G 34G 18% / +tmpfs tmpfs 64M 0 64M 0% /dev +tmpfs tmpfs 3.8G 0 3.8G 0% /sys/fs/cgroup +JuiceFS:jfs fuse.juicefs 1.0P 180M 1.0P 1% /config +... +``` + +From the results returned from the container, we can see that it is in full compliance with expectations, and the JuiceFS volume has been mounted to the `/config` directory we specified. + +When a PV is dynamically created through PVC as above, JuiceFS will create a directory with the same name as the PV in the root directory of the file system and mount it to the container. Execute the following command to view all PVs in the cluster: + +```shell +$ kubectl get pv -A +NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE +pvc-b670c8a1-2962-497c-afa2-33bc8b8bb05d 10Pi RWX Retain Bound default/web-pvc juicefs-sc 34m +``` + +Mount the same JuiceFS storage through an external host, and you can see the PVs currently in use and the PVs that have been created. + +![](../images/pv-on-juicefs.png) + +For more details about JuiceFS CSI Driver please refer to [project homepage](https://github.com/juicedata/juicefs-csi-driver). + +### Create more JuiceFS storage classes + +You can repeat the previous steps according to actual needs to create any number of storage classes through JuiceFS CSI Driver. But pay attention to modifying the name of the storage class and the configuration information of the JuiceFS file system to avoid conflicts with the created storage class. For example, when using Helm, you can create a configuration file named `jfs2.yaml`: + +```yaml +storageClasses: +- name: jfs-sc2 + enabled: true + reclaimPolicy: Retain + backend: + name: "jfs-2" + metaurl: "redis://example.abc.0001.use1.cache.amazonaws.com/3" + storage: "s3" + accessKey: "" + secretKey: "" + bucket: "https://jfs2.s3.us-east-1.amazonaws.com" +``` + +Execute the Helm command to deploy: + +```shell +$ helm repo add juicefs-csi-driver https://juicedata.github.io/juicefs-csi-driver/ +$ helm repo update +$ helm upgrade juicefs-csi-driver juicefs-csi-driver/juicefs-csi-driver --install -f ./jfs2.yaml +``` + +View the storage classes in the cluster: + +```shell +$ kubectl get sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +juicefs-sc csi.juicefs.com Retain Immediate false 88m +juicefs-sc2 csi.juicefs.com Retain Immediate false 13m +standard (default) k8s.io/minikube-hostpath Delete Immediate false 128m +``` + +### Monitoring + +Please see the ["Monitoring"](../administration/monitoring.md) documentation to learn how to collect and display JuiceFS monitoring metrics. + +## Mount JuiceFS in the container + +In some cases, you may need to mount JuiceFS volume directly in the container, which requires the use of the JuiceFS client in the container. You can refer to the following `Dockerfile` sample to integrate the JuiceFS client into the application image: + +```dockerfile +FROM alpine:latest +LABEL maintainer="Juicedata " + +# Install JuiceFS client +RUN apk add --no-cache curl && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + install juicefs /usr/bin && \ + rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + rm -rf /var/cache/apk/* && \ + apk del curl + +ENTRYPOINT ["/usr/bin/juicefs", "mount"] +``` + +Since JuiceFS needs to use the FUSE device to mount the file system, it is necessary to allow the container to run in a privileged mode when creating a Pod: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run +spec: + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + securityContext: + privileged: true +``` + +> ⚠️ **Risk Warning**: After enabling the privileged mode of `privileged: true`, the container has access to all devices of the host, that is, it has full control of the host's kernel. Improper use will bring serious safety hazards, please conduct a sufficient safety assessment before using this method. diff --git a/docs/en/deployment/juicefs_on_docker.md b/docs/en/deployment/juicefs_on_docker.md new file mode 100644 index 0000000..da13af9 --- /dev/null +++ b/docs/en/deployment/juicefs_on_docker.md @@ -0,0 +1,107 @@ +--- +sidebar_label: Use JuiceFS on Docker +sidebar_position: 1 +slug: /juicefs_on_docker +--- +# Use JuiceFS on Docker + +There are three ways to use JuiceFS on Docker: + +## 1. Volume Mapping + +This method is to map the directories in the JuiceFS mount point to the Docker container. For example, the JuiceFS storage is mounted in the `/mnt/jfs` directory. When creating a container, you can map JuiceFS storage to the Docker container as follows: + +```shell +$ sudo docker run -d --name nginx \ + -v /mnt/jfs/html:/usr/share/nginx/html \ + -p 8080:80 \ + nginx +``` + +By default, only the user who mounts the JuiceFS storage has the access permissions for the storage. When you need to map the JuiceFS storage to a Docker container, if you are not using the root identity to mount the JuiceFS storage, you need to turn on the FUSE `user_allow_other` first, and then re-mount the JuiceFS with `-o allow_other` option. + +> **Note**: JuiceFS storage mounted with root user identity or `sudo` will automatically add the `allow_other` option, no manual setting is required. + +### FUSE Setting + +By default, the `allow_other` option is only allowed to be used by the root user. In order to allow other users to use this mount option, the FUSE configuration file needs to be modified. + +### Change the configuration file + +Edit the configuration file of FUSE, usually `/etc/fuse.conf`: + +```sh +$ sudo nano /etc/fuse.conf +``` + +Delete the `# ` symbol in front of `user_allow_other` in the configuration file, and modify it as follows: + +```conf +# /etc/fuse.conf - Configuration file for Filesystem in Userspace (FUSE) + +# Set the maximum number of FUSE mounts allowed to non-root users. +# The default is 1000. +#mount_max = 1000 + +# Allow non-root users to specify the allow_other or allow_root mount options. +user_allow_other +``` + +#### Re-mount JuiceFS + +After the `allow_other` of FUSE is enabled, you need to re-mount the JuiceFS file systemd with the `allow_other` option, for example: + +```sh +$ juicefs mount -d -o allow_other redis://:6379/1 /mnt/jfs +``` + +## 2. Docker Volume Plugin + +We can also use [volume plugin](https://docs.docker.com/engine/extend/) to access JuiceFS. + +```sh +$ docker plugin install juicedata/juicefs +Plugin "juicedata/juicefs" is requesting the following privileges: + - network: [host] + - device: [/dev/fuse] + - capabilities: [CAP_SYS_ADMIN] +Do you grant the above permissions? [y/N] + +$ docker volume create -d juicedata/juicefs:latest -o name={{VOLUME_NAME}} -o metaurl={{META_URL}} -o access-key={{ACCESS_KEY}} -o secret-key={{SECRET_KEY}} jfsvolume +$ docker run -it -v jfsvolume:/opt busybox ls /opt +``` + +Replace above `{{VOLUME_NAME}}`, `{{META_URL}}`, `{{ACCESS_KEY}}`, `{{SECRET_KEY}}` to your own volume setting. For more details about JuiceFS volume plugin, refer [juicedata/docker-volume-juicefs](https://github.com/juicedata/docker-volume-juicefs) repository. + +## 3. Mount JuiceFS in a Container + +This method is to mount and use the JuiceFS storage directly in the Docker container. Compared with the first method, directly mounting JuiceFS in the container can reduce the chance of file misoperation. It also makes container management clearer and more intuitive. + +Since the file system mounting in the container needs to copy the JuiceFS client to the container, the process of downloading or copying the JuiceFS client and mounting the file system needs to be written into the Dockerfile, and then rebuilt the image. For example, you can refer to the following Dockerfile to package the JuiceFS client into the Alpine image. + +```dockerfile +FROM alpine:latest +LABEL maintainer="Juicedata " + +# Install JuiceFS client +RUN apk add --no-cache curl && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + install juicefs /usr/bin && \ + rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + rm -rf /var/cache/apk/* && \ + apk del curl + +ENTRYPOINT ["/usr/bin/juicefs", "mount"] +``` + +In addition, since the use of FUSE in the container requires corresponding permissions, when creating the container, you need to specify the `--privileged=true` option, for example: + +```shell +$ sudo docker run -d --name nginx \ + -v /mnt/jfs/html:/usr/share/nginx/html \ + -p 8080:80 \ + --privileged=true \ + nginx-with-jfs +``` diff --git a/docs/en/deployment/s3_gateway.md b/docs/en/deployment/s3_gateway.md new file mode 100644 index 0000000..0d62dbe --- /dev/null +++ b/docs/en/deployment/s3_gateway.md @@ -0,0 +1,247 @@ +--- +sidebar_label: Deploy JuiceFS S3 Gateway +sidebar_position: 4 +slug: /s3_gateway +--- +# Deploy JuiceFS S3 Gateway + +JuiceFS has introduced S3 gateway since v0.11, a feature implemented through the [MinIO S3 Gateway](https://docs.min.io/docs/minio-gateway-for-s3.html). It provides an S3-compatible RESTful API for files on JuiceFS, enabling the management of files stored on JuiceFS with tools such as s3cmd, AWS CLI, MinIO Client (mc), etc. in cases where mounting is not convenient. In addition, S3 gateway also provides a web-based file manager that allows users to use a browser to manage the files on JuiceFS. + +Because JuiceFS stores files in chunks, the files cannot be accessed directly using the interface of the underlying object storage. The S3 gateway provides similar access to the underlying object storage, with the following architecture diagram. + +![](../images/juicefs-s3-gateway-arch.png) + +## Prerequisites + +The S3 gateway is a feature built on top of the JuiceFS file system. If you do not have a JuiceFS file system, please refer to the [quick start guide](../getting-started/for_local.md) to create one first. + +JuiceFS S3 gateway is a feature introduced in v0.11, please make sure you have the latest version of JuiceFS. + +## Quickstart + +The S3 gateway can be enabled on the current host using the `gateway` subcommand of JuiceFS. Before enabling the feature, you need to set `MINIO_ROOT_USER` and `MINIO_ROOT_PASSWORD` environment variables, which are the Access Key and Secret Key used to authenticate when accessing the S3 API, and can be simply considered as the username and password of the S3 gateway. For example. + +```shell +$ export MINIO_ROOT_USER=admin +$ export MINIO_ROOT_PASSWORD=12345678 +$ juicefs gateway redis://localhost:6379 localhost:9000 +``` + +Among the above three commands, the first two commands are used to set environment variables. Note that the length of `MINIO_ROOT_USER` is at least 3 characters, and the length of `MINIO_ROOT_PASSWORD` is at least 8 characters (Windows users should set the environment variable with the `set` command instead, e.g., `set MINIO_ROOT_USER=admin`). + +The last command is used to enable the S3 gateway. The `gateway` subcommand requires at least two parameters, the first is the URL of the database where the metadata is stored, and the second is the address and port on which the S3 gateway is listening. You can add [other options](../reference/command_reference.md#juicefs-gateway) to the `gateway` subcommand to optimize the S3 gateway as needed, for example, to set the default local cache to 20 GiB. + +```shell +$ juicefs gateway --cache-size 20480 redis://localhost:6379 localhost:9000 +``` + +In this example, we assume that the JuiceFS file system is using a local Redis database. When the S3 gateway is enabled, the administrative interface of the S3 gateway can be accessed on the **current host** using the address `http://localhost:9000`. + +![](../images/s3-gateway-file-manager.jpg) + +If you want to access the S3 gateway through other hosts on the LAN or the Internet, you need to adjust the listening address, e.g. + +```shell +$ juicefs gateway redis://localhost:6379 0.0.0.0:9000 +``` + +In this way, the S3 gateway will accept all network requests by default. S3 clients in different locations can access the S3 gateway using different addresses, e.g. + +- Third-party clients in the host where the S3 gateway is located can use `http://127.0.0.1:9000` or `http://localhost:9000` for access. +- A third-party client on the same LAN as the host where the S3 gateway is located can access it using `http://192.168.1.8:9000` (assuming the intranet IP address of the S3 gateway-enabled host is 192.168.1.8). +- The S3 gateway can be accessed via the Internet using `http://110.220.110.220:9000` (assuming that the public IP address of the S3 gateway-enabled host is 110.220.110.220). + +## Accessing S3 gateway + +The JuiceFS S3 gateway can be accessed by various clients, desktop applications, web applications, etc. that support the S3 API. Please note the address and port that the S3 gateway listens on when using it. + +:::tip +The following examples are for using a third-party client to access the S3 gateway running on the local host. In specific scenarios, please adjust the address to access the S3 gateway according to the actual situation. +::: + +### Using the AWS CLI + +```bash +$ aws configure +AWS Access Key ID [None]: admin +AWS Secret Access Key [None]: 12345678 +Default region name [None]: +Default output format [None]: +``` + +The program will guide you through adding the new configuration interactively, where `Access Key ID` is the same as `MINIO_ROOT_USER` and `Secret Access Key` is the same as `MINIO_ROOT_PASSWORD`, leave the region name and output format blank. + +After that, you can access the JuiceFS storage using the `aws s3` command, for example: + +```bash +# List buckets +$ aws --endpoint-url http://localhost:9000 s3 ls + +# List objects in bucket +$ aws --endpoint-url http://localhost:9000 s3 ls s3:// +``` + +### Using the MinIO client + +First install `mc` by referring to the [MinIO download page](https://min.io/download), then add a new alias: + +```bash +$ mc alias set juicefs http://localhost:9000 admin 12345678 --api S3v4 +``` + +Following the mc command format, the above command creates a configuration with the alias `juicefs`. In particular, note that the API version must be specified in the command, `-api "s3v4"`. + +Then, you can freely manage the copying, moving, adding and deleting of files and folders between your local disk and JuiceFS storage as well as other cloud storage via the mc client. + +```shell +$ mc ls juicefs/jfs +[2021-10-20 11:59:00 CST] 130KiB avatar-2191932_1920.png +[2021-10-20 11:59:00 CST] 4.9KiB box-1297327.svg +[2021-10-20 11:59:00 CST] 21KiB cloud-4273197.svg +[2021-10-20 11:59:05 CST] 17KiB hero.svg +[2021-10-20 11:59:06 CST] 1.7MiB hugo-rocha-qFpnvZ_j9HU-unsplash.jpg +[2021-10-20 11:59:06 CST] 16KiB man-1352025.svg +[2021-10-20 11:59:06 CST] 1.3MiB man-1459246.ai +[2021-10-20 11:59:08 CST] 19KiB sign-up-accent-left.07ab168.svg +[2021-10-20 11:59:10 CST] 11MiB work-4997565.svg +``` + +## Deploy JuiceFS S3 Gateway in Kubernetes + +### Install via kubectl + +Create a secret (take Amazon S3 as an example): + +```shell +export NAMESPACE=default +``` + +```shell +kubectl -n ${NAMESPACE} create secret generic juicefs-secret \ + --from-literal=name= \ + --from-literal=metaurl=redis://[:]@:6379[/] \ + --from-literal=storage=s3 \ + --from-literal=bucket=https://.s3..amazonaws.com \ + --from-literal=access-key= \ + --from-literal=secret-key= +``` + +- `name`: The JuiceFS file system name. +- `metaurl`: Connection URL for metadata engine (e.g. Redis). Read [this document](../reference/how_to_setup_metadata_engine.md) for more information. +- `storage`: Object storage type, such as `s3`, `gs`, `oss`. Read [this document](../reference/how_to_setup_object_storage.md) for the full supported list. +- `bucket`: Bucket URL. Read [this document](../reference/how_to_setup_object_storage.md) to learn how to setup different object storage. +- `access-key`: Access key of object storage. Read [this document](../reference/how_to_setup_object_storage.md) for more information. +- `secret-key`: Secret key of object storage. Read [this document](../reference/how_to_setup_object_storage.md) for more information. + +Then download the S3 gateway [deployment YAML](https://github.com/juicedata/juicefs/blob/main/deploy/juicefs-s3-gateway.yaml) and create the `Deployment` and `Service` resources via `kubectl`. The following points require special attention: + +- Please replace `${NAMESPACE}` in the following command with the Kubernetes namespace of the actual S3 gateway deployment, which defaults to `kube-system`. +- The `replicas` for `Deployment` defaults to 1, please adjust as appropriate. +- The latest version of `juicedata/juicefs-csi-driver` image is used by default, which already integrates the latest version of JuiceFS client, please check [here](https://github.com/juicedata/juicefs-csi-driver/releases) for the specific integrated JuiceFS client version. +- The `initContainers` of `Deployment` will first try to format the JuiceFS file system, if you have already formatted it in advance, this step will not affect the existing JuiceFS file system. +- The default port number that the S3 gateway listens on is 9000 +- The [startup options](../reference/command_reference.md#juicefs-gateway) of S3 gateway are the default values, please adjust them according to your actual needs. +- The value of `MINIO_ROOT_USER` environment variable is `access-key` in Secret, and the value of `MINIO_ROOT_PASSWORD` environment variable is `secret-key` in Secret. + +```shell +curl -sSL https://raw.githubusercontent.com/juicedata/juicefs/main/deploy/juicefs-s3-gateway.yaml | sed "s@kube-system@${NAMESPACE}@g" | kubectl apply -f - +``` + +Check if it's deployed successfully: + +```shell +# kubectl -n $NAMESPACE get po -o wide -l app.kubernetes.io/name=juicefs-s3-gateway +juicefs-s3-gateway-5c7d65c77f-gj69l 1/1 Running 0 37m 10.244.2.238 kube-node-3 +# kubectl -n $NAMESPACE get svc -l app.kubernetes.io/name=juicefs-s3-gateway +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +juicefs-s3-gateway ClusterIP 10.101.108.42 9000/TCP 142m +``` + +You can use `juicefs-s3-gateway.${NAMESPACE}.svc.cluster.local:9000` or pod IP and port number of `juicefs-s3-gateway` (e.g. `10.244.2.238:9000`) in the application pod to access JuiceFS S3 Gateway. + +If you want to access through Ingress, you need to ensure that the Ingress Controller has been deployed in the cluster. Refer to [Ingress Controller Deployment Document](https://kubernetes.github.io/ingress-nginx/deploy/). Then create an `Ingress` resource: + +```yaml +kubectl apply -f - <` of ingress controller as follows (no need to include the 9000 port number): + +```shell +kubectl get services -n ingress-nginx +``` + +There are some differences between the various versions of Ingress. For more usage methods, please refer to [Ingress Controller Usage Document](https://kubernetes.github.io/ingress-nginx/user-guide/basic-usage/). + +### Install via Helm + +1. Prepare a YAML file + + Create a configuration file, for example: `values.yaml`, copy and complete the following configuration information. Among them, the `secret` part is the information related to the JuiceFS file system, you can refer to [JuiceFS Quick Start Guide](../getting-started/for_local.md) for more information. + + ```yaml + secret: + name: "" + metaurl: "" + storage: "" + accessKey: "" + secretKey: "" + bucket: "" + ``` + + If you want to deploy Ingress, add these in `values.yaml`: + + ```yaml + ingress: + enables: true + ``` + +2. Deploy + + Execute the following three commands in sequence to deploy the JuiceFS S3 gateway via Helm (note that the following example is deployed to the `kube-system` namespace). + + ```sh + helm repo add juicefs-s3-gateway https://juicedata.github.io/charts/ + helm repo update + helm install juicefs-s3-gateway juicefs-s3-gateway/juicefs-s3-gateway -n kube-system -f ./values.yaml + ``` + +3. Check the deployment + + - **Check pods are running**: the deployment will launch a `Deployment` named `juicefs-s3-gateway`, so run `kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway` should see all running pods. For example: + + ```sh + $ kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway + NAME READY STATUS RESTARTS AGE + juicefs-s3-gateway-5c69d574cc-t92b6 1/1 Running 0 136m + ``` + + - **Check Service**: run `kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway` to check Service: + + ```shell + $ kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + juicefs-s3-gateway ClusterIP 10.101.108.42 9000/TCP 142m + ``` + +## Monitoring + +Please see the ["Monitoring"](../administration/monitoring.md) documentation to learn how to collect and display JuiceFS monitoring metrics. diff --git a/docs/en/development/contributing_guide.md b/docs/en/development/contributing_guide.md new file mode 100644 index 0000000..929dada --- /dev/null +++ b/docs/en/development/contributing_guide.md @@ -0,0 +1,8 @@ +--- +sidebar_label: Contributing Guide +sidebar_position: 1 +--- +# Contributing Guide + +:::note +Working in progress. \ No newline at end of file diff --git a/docs/en/development/format.md b/docs/en/development/format.md new file mode 100644 index 0000000..c32de3f --- /dev/null +++ b/docs/en/development/format.md @@ -0,0 +1,8 @@ +--- +sidebar_label: Storage Format +sidebar_position: 3 +--- +# Storage Format + +:::note +Working in progress. \ No newline at end of file diff --git a/docs/en/development/io_processing.md b/docs/en/development/io_processing.md new file mode 100644 index 0000000..a8b553e --- /dev/null +++ b/docs/en/development/io_processing.md @@ -0,0 +1,9 @@ +--- +sidebar_label: Read/Write/Delete Processing Flow +sidebar_position: 2 +slug: /internals/io_processing +--- +# Read and write request processing flow + +Working in progress... + diff --git a/docs/en/development/metadata-design/_kv.md b/docs/en/development/metadata-design/_kv.md new file mode 100644 index 0000000..f13b7d7 --- /dev/null +++ b/docs/en/development/metadata-design/_kv.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Distributed K/V Store +sidebar_position: 3 +--- +# Metadata Design - Distributed K/V Store \ No newline at end of file diff --git a/docs/en/development/metadata-design/_redis.md b/docs/en/development/metadata-design/_redis.md new file mode 100644 index 0000000..09b9482 --- /dev/null +++ b/docs/en/development/metadata-design/_redis.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Redis +sidebar_position: 1 +--- +# Metadata Design - Redis \ No newline at end of file diff --git a/docs/en/development/metadata-design/_sql.md b/docs/en/development/metadata-design/_sql.md new file mode 100644 index 0000000..6b412c3 --- /dev/null +++ b/docs/en/development/metadata-design/_sql.md @@ -0,0 +1,5 @@ +--- +sidebar_label: SQL Engine +sidebar_position: 2 +--- +# Metadata Design - SQL Engine \ No newline at end of file diff --git a/docs/en/faq.md b/docs/en/faq.md new file mode 100644 index 0000000..df77aee --- /dev/null +++ b/docs/en/faq.md @@ -0,0 +1,110 @@ +# FAQ + +## Why doesn't JuiceFS support XXX object storage? + +JuiceFS already supported many object storage, please check [the list](reference/how_to_setup_object_storage.md#supported-object-storage) first. If this object storage is compatible with S3, you could treat it as S3. Otherwise, try reporting issue. + +## Can I use Redis cluster? + +The simple answer is no. JuiceFS uses [transaction](https://redis.io/topics/transactions) to guarantee the atomicity of metadata operations, which is not well supported in cluster mode. Sentinal or other HA solution for Redis are needed. + +See ["Redis Best Practices"](administration/metadata/redis_best_practices.md) for more information. + +## What's the difference between JuiceFS and XXX? + +See ["Comparison with Others"](comparison/juicefs_vs_alluxio.md) for more information. + +## How is the performance of JuiceFS? + +JuiceFS is a distributed file system, the latency of metedata is determined by 1 (reading) or 2 (writing) round trip(s) between client and metadata service (usually 1-3 ms). The latency of first byte is determined by the performance of underlying object storage (usually 20-100 ms). Throughput of sequential read/write could be 50MB/s - 2800MiB/s (see [fio benchmark](benchmark/fio.md) for more information), depends on network bandwidth and how the data could be compressed. + +JuiceFS is built with multiple layers of caching (invalidated automatically), once the caching is warmed up, the latency and throughput of JuiceFS could be close to local filesystem (having the overhead of FUSE). + +## Does JuiceFS support random read/write? + +Yes, including those issued using mmap. Currently JuiceFS is optimized for sequential reading/writing, and optimized for random reading/writing is work in progress. If you want better random reading performance, it's recommended to turn off compression ([`--compress none`](reference/command_reference.md#juicefs-format)). + +## When my update will be visible to other clients? + +All the metadata updates are immediately visible to all others. JuiceFS guarantees close-to-open consistency, see ["Consistency"](administration/cache_management.md#consistency) for more information. + +The new data written by `write()` will be buffered in kernel or client, visible to other processes on the same machine, not visible to other machines. + +Either call `fsync()`, `fdatasync()` or `close()` to force upload the data to the object storage and update the metadata, or after several seconds of automatic refresh, other clients can visit the updates. It is also the strategy adopted by the vast majority of distributed file systems. + +See ["Write Cache in Client"](administration/cache_management.md#write-cache-in-client) for more information. + +## How to copy a large number of small files into JuiceFS quickly? + +You could mount JuiceFS with [`--writeback` option](reference/command_reference.md#juicefs-mount), which will write the small files into local disks first, then upload them to object storage in background, this could speedup coping many small files into JuiceFS. + +See ["Write Cache in Client"](administration/cache_management.md#write-cache-in-client) for more information. + +## Can I mount JuiceFS without `root`? + +Yes, JuiceFS could be mounted using `juicefs` without root. The default directory for caching is `$HOME/.juicefs/cache` (macOS) or `/var/jfsCache` (Linux), you should change that to a directory which you have write permission. + +See ["Read Cache in Client"](administration/cache_management.md#read-cache-in-client) for more information. + +## How to unmount JuiceFS? + +Use [`juicefs umount`](reference/command_reference.md#juicefs-umount) command to unmount the volume. + +## How to upgrade JuiceFS client? + +First unmount JuiceFS volume, then re-mount the volume with newer version client. + +## `docker: Error response from daemon: error while creating mount source path 'XXX': mkdir XXX: file exists.` + +When you use [Docker bind mounts](https://docs.docker.com/storage/bind-mounts) to mount a directory on the host machine into a container, you may encounter this error. The reason is that `juicefs mount` command was executed with non-root user. In turn, Docker daemon doesn't have permission to access the directory. + +There are two solutions to this problem: + +1. Execute `juicefs mount` command with root user +2. Modify FUSE configuration and add `allow_other` mount option, see [this document](reference/fuse_mount_options.md#allow_other) for more information. + +## `/go/pkg/tool/linux_amd64/link: running gcc failed: exit status 1` or `/go/pkg/tool/linux_amd64/compile: signal: killed` + +This error may caused by GCC version is too low, please try to upgrade your GCC to 5.4+. + +## `format: ERR wrong number of arguments for 'auth' command` + +This error means you use Redis < 6.0.0 and specify username in Redis URL when execute `juicefs format` command. Only Redis >= 6.0.0 supports specify username, so you need omit the username parameter in the URL, e.g. `redis://:password@host:6379/1`. + +## `fuse: fuse: exec: "/bin/fusermount": stat /bin/fusermount: no such file or directory` + +This error means `juicefs mount` command was executed with non-root user, and `fusermount` command cannot found. + +There are two solutions to this problem: + +1. Execute `juicefs mount` command with root user +2. Install `fuse` package (e.g. `apt-get install fuse`, `yum install fuse`) + +## `fuse: fuse: fork/exec /usr/bin/fusermount: permission denied` + +This error means current user doesn't have permission to execute `fusermount` command. For example, check `fusermount` permission with following command: + +```sh +$ ls -l /usr/bin/fusermount +-rwsr-x---. 1 root fuse 27968 Dec 7 2011 /usr/bin/fusermount +``` + +Above example means only root user and `fuse` group user have executable permission. Another example: + +```sh +$ ls -l /usr/bin/fusermount +-rwsr-xr-x 1 root root 32096 Oct 30 2018 /usr/bin/fusermount +``` + +Above example means all users have executable permission. + +## Why the same user on host X has permission to access a file in JuiceFS while has no permission to it on host Y? + +The same user has different UID or GID on host X and host Y. Use `id` command to show the UID and GID: + +```bash +$ id alice +uid=1201(alice) gid=500(staff) groups=500(staff) +``` + +Read ["Sync Accounts between Multiple Hosts"](administration/sync_accounts_between_multiple_hosts.md) to resolve this problem. diff --git a/docs/en/getting-started/_choose_metadata_engine.md b/docs/en/getting-started/_choose_metadata_engine.md new file mode 100644 index 0000000..0f262cc --- /dev/null +++ b/docs/en/getting-started/_choose_metadata_engine.md @@ -0,0 +1,6 @@ +--- +sidebar_label: How to Choose Metadata Engine +sidebar_position: 4 +--- + +# How to Choose Metadata Engine \ No newline at end of file diff --git a/docs/en/getting-started/_prerequisites.md b/docs/en/getting-started/_prerequisites.md new file mode 100644 index 0000000..1fc42ad --- /dev/null +++ b/docs/en/getting-started/_prerequisites.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Prerequisites +sidebar_position: 1 +--- +# Prerequisites diff --git a/docs/en/getting-started/_quick_start_guide.md b/docs/en/getting-started/_quick_start_guide.md new file mode 100644 index 0000000..29e5048 --- /dev/null +++ b/docs/en/getting-started/_quick_start_guide.md @@ -0,0 +1,243 @@ +--- +sidebar_label: Quick Start Guide +sidebar_position: 2 +slug: /quick_start_guide +--- + +# JuiceFS Quick Start Guide + +To create a JuiceFS file system, you need the following 3 preparations: + +1. Redis database for metadata storage +2. Object storage is used to store data blocks +3. JuiceFS Client + +:::tip +Don’t know JuiceFS? You can check first [What is JuiceFS?](../introduction/introduction.md) +::: + +## 1. Redis Database + +You can easily buy cloud Redis databases in various configurations on the cloud computing platform, but if you just want to quickly evaluate JuiceFS, you can use Docker to quickly run a Redis database instance on your local computer: + +```shell +$ sudo docker run -d --name redis \ + -v redis-data:/data \ + -p 6379:6379 \ + --restart unless-stopped \ + redis redis-server --appendonly yes +``` + +After the container is successfully created, you can use `redis://127.0.0.1:6379` to access the Redis database. + +:::info +The above command persists Redis data in the `redis-data` data volume of docker, and you can modify the storage location of data persistence as needed. +::: + +:::caution +The Redis database instance created by the above command does not enable authentication and exposes the host's `6379` port. If you want to access this database via the Internet, it is strongly recommended to refer to [Redis official documentation](https://redis.io/topics/security) to enable protected mode. +::: + +For more information about Redis database, [click here to view](../reference/how_to_setup_metadata_engine.md#redis). + +## 2. Object Storage + +Like Redis databases, almost all public cloud computing platforms provide object storage services. Because JuiceFS supports object storage services on almost all platforms, you can choose freely according to your personal preferences. You can check our [Object Storage Support List and Setting Guide](../reference/how_to_setup_object_storage.md), which lists all the object storage services currently supported by JuiceFS and how to use them. + +Of course, if you just want to quickly evaluate JuiceFS, you can use Docker to quickly run a MinIO object storage instance on your local computer: + +```shell +$ sudo docker run -d --name minio \ + -p 9000:9000 \ + -p 9900:9900 \ + -v $PWD/minio-data:/data \ + --restart unless-stopped \ + minio/minio server /data --console-address ":9900" +``` + +Then, access the service: + +- **MinIO Web Console**:http://127.0.0.1:9900 +- **MinIO API**:http://127.0.0.1:9000 + +The initial Access Key and Secret Key of the root user are both `minioadmin`. + +After the container is successfully created, use `http://127.0.0.1:9000` to access the MinIO management interface. The initial Access Key and Secret Key of the root user are both `minioadmin`. + +:::info +The latest MinIO includes a new web console, the above command sets and maps port `9900` through `--console-address ":9900"` option. In addtion, it maps the data path in the MinIO container to the `minio-data` folder in the current directory. You can modify these options as needed. +::: + +## 3. JuiceFS Client + +JuiceFS supports Linux, Windows, macOS and other operating systems and various processor architectures. You can download the latest pre-compiled binary program from [here](https://github.com/juicedata/juicefs/releases/latest), please refer to [this document](installation.md#install-the-pre-compiled-client) to select the corresponding version according to the actual system and processor architecture used. + +Take the x86-based Linux system as an example, download the compressed package containing `linux-amd64` in the file name: + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +Unzip and install: + +```shell +$ tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +$ sudo install juicefs /usr/local/bin +``` + +:::tip +You can also compile the JuiceFS client manually from the source code, please refer to [document](installation.md#manually-compiling). +::: + +## 4. Create JuiceFS file system + +When creating a JuiceFS file system, you need to specify both the Redis database used to store metadata and the object storage used to store actual data. + +The following command will create a JuiceFS file system named `pics`, use the database `1` in Redis to store metadata, and use the `pics` bucket created in MinIO to store actual data: + +```shell +$ juicefs format \ + --storage minio \ + --bucket http://127.0.0.1:9000/pics \ + --access-key minioadmin \ + --secret-key minioadmin \ + redis://127.0.0.1:6379/1 \ + pics +``` + +After executing the command, you will see output similar to the following, indicating that the JuiceFS file system was created successfully. + +```shell +2021/04/29 23:01:18.352256 juicefs[34223] : Meta address: redis://127.0.0.1:6379/1 +2021/04/29 23:01:18.354252 juicefs[34223] : Ping redis: 132.185µs +2021/04/29 23:01:18.354758 juicefs[34223] : Data use minio://127.0.0.1:9000/pics/pics/ +2021/04/29 23:01:18.361674 juicefs[34223] : Volume is formatted as {Name:pics UUID:9c0fab76-efd0-43fd-a81e-ae0916e2fc90 Storage:minio Bucket:http://127.0.0.1:9000/pics AccessKey:minioadmin SecretKey:removed BlockSize:4096 Compression:none Partitions:0 EncryptKey:} +``` + +:::info +You can create as many JuiceFS file systems as you need. But it should be noted that only one file system can be created in each Redis database. For example, when you want to create another file system named `memory`, you have to use another database in Redis, such as No.2, which is `redis://127.0.0.1:6379/2`. +::: + +:::info +If you don't specify `--storage` option, the JuiceFS client will use the local disk as data storage. When using local storage, JuiceFS can only be used on a local stand-alone machine and cannot be mounted by other clients in the network. [Click here](../reference/how_to_setup_object_storage.md#local-disk) for details. +::: + +## 5. Mount JuiceFS file system + +After the JuiceFS file system is created, you can mount it on the operating system and use it. The following command mounts the `pics` file system to the `/mnt/jfs` directory. + +```shell +$ sudo juicefs mount -d redis://127.0.0.1:6379/1 /mnt/jfs +``` + +:::tip +When mounting the JuiceFS file system, there is no need to explicitly specify the name of the file system, just fill in the correct Redis server address and database number. +::: + +After executing the command, you will see output similar to the following, indicating that the JuiceFS file system has been successfully mounted on the system. + +```shell +2021/04/29 23:22:25.838419 juicefs[37999] : Meta address: redis://127.0.0.1:6379/1 +2021/04/29 23:22:25.839184 juicefs[37999] : Ping redis: 67.625µs +2021/04/29 23:22:25.839399 juicefs[37999] : Data use minio://127.0.0.1:9000/pics/pics/ +2021/04/29 23:22:25.839554 juicefs[37999] : Cache: /var/jfsCache/9c0fab76-efd0-43fd-a81e-ae0916e2fc90 capacity: 1024 MB +2021/04/29 23:22:26.340509 juicefs[37999] : OK, pics is ready at /mnt/jfs +``` + +After the mounting is complete, you can access files in the `/mnt/jfs` directory. You can execute the `df` command to view the JuiceFS file system's mounting status: + +```shell +$ df -Th +Filesystem Type Size Used Avail Use% Mounted on +JuiceFS:pics fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +:::info +By default, the cache of JuiceFS is located in the `/var/jfsCache` directory. In order to obtain the read and write permissions of this directory, the `sudo` command is used here to mount the JuiceFS file system with administrator privileges. When ordinary users read and write `/mnt/jfs`, please assign them the appropriate permissions. +::: + +## 6. Automatically mount JuiceFS on boot + +Rename the `juicefs` client to `mount.juicefs` and copy it to the `/sbin/` directory: + +```shell +$ sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +:::info +Before executing the above command, we assume that the `juicefs` client program is already in the `/usr/local/bin` directory. You can also unzip a copy of the `juicefs` program directly from the downloaded compression package, rename it according to the above requirements, and copy it to the `/sbin/` directory. +::: + +Edit the `/etc/fstab` configuration file, start a new line, and add a record according to the following format: + +``` + juicefs _netdev[,] 0 0 +``` + +- Please replace `` with the actual Redis database address in the format of `redis://:@:/`, for example: `redis ://localhost:6379/1`. +- Please replace `` with the actual mount point of the file system, for example: `/jfs`. +- If necessary, please replace `[,]` with the actual [mount options](../reference/command_reference.md#juicefs-mount) to be set, and multiple options are separated by commas. + +For example: + +``` +redis://localhost:6379/1 /jfs juicefs _netdev,max-uploads=50,writeback,cache-size=2048 0 0 +``` + +:::caution +By default, CentOS 6 will not mount the network file system when the system starts. You need to execute the command to enable the automatic mounting support of the network file system: +::: + +```bash +$ sudo chkconfig --add netfs +``` + +## 7. Unmount JuiceFS + +If you need to unmount a JuiceFS file system, you can first execute the `df` command to view the information of the mounted file systems: + +```shell +$ sudo df -Th + +File system type capacity used usable used% mount point +... +JuiceFS:pics fuse.juicefs 1.0P 1.1G 1.0P 1% /mnt/jfs +``` + +You can see that the mount point of the file system `pics` is `/mnt/jfs`, execute the `umount` subcommand: + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +:::tip +Execute the `juicefs umount -h` command to obtain detailed help information for the unmount command. +::: + +### Unmount failed + +If a file system fails to be unmounted after executing the command, it will prompt `Device or resource busy`: + +```shell +2021-05-09 22:42:55.757097 I | fusermount: failed to unmount /mnt/jfs: Device or resource busy +exit status 1 +``` + +This can happen because some programs are reading and writing files in the file system. To ensure data security, you should first check which programs are interacting with files in the file system (e.g. through the `lsof` command), and try to end the interaction between them, and then execute the uninstall command again. + +:::caution +The commands contained in the following content may cause files damage or loss, please be cautious! +::: + +Of course, you can also add the `--force` or `-f` parameter to the unmount command to force the file system to be unmounted, but you have to bear the possible catastrophic consequences: + +```shell +$ sudo juicefs umount --force /mnt/jfs +``` + +You can also use the `fusermount` command to unmount the file system: + +```shell +$ sudo fusermount -u /mnt/jfs +``` diff --git a/docs/en/getting-started/for_distributed.md b/docs/en/getting-started/for_distributed.md new file mode 100644 index 0000000..8897ae7 --- /dev/null +++ b/docs/en/getting-started/for_distributed.md @@ -0,0 +1,180 @@ +--- +sidebar_label: Quick Start (Distributed Mode) +sidebar_position: 3 +--- + +# JuiceFS Quick Start Guide for Distributed Mode + +The previous document ["JuiceFS Quick Start Guide for Standalone Mode "](for_local.md) created a file system that can be mounted on any host by using a combination of an "object store" and a "SQLite" database. Thanks to the feature that the object store is accessible by any computer with privileges on the network, we can access the same JuiceFS file system on different computers by simply copying the SQLite database file to any computer that wants to access the store. + +Obviously, it is feasible to share the file system by copying the SQLite database between computers, but the real-time availability of the files is not guaranteed. Since SQLite is a single file database that cannot be accessed by multiple computers at the same time, we need to use a database that supports network access, such as Redis, PostgreSQL, MySQL, etc., in order to allow a file system to be mounted and read by multiple computers in a distributed environment. + +In this document, based on the previous document, we further replace the database from a single-user "SQLite" to a multi-user "cloud database", thus realizing a distributed file system that can be mounted on any computer on the network for reading and writing. + +## Network Database + +The meaning of "Network Database" here refers to a database that allows multiple users to access it simultaneously over the network. From this perspective, the database can be simply divided into: + +1. **Standalone Database**: such databases are single file and usually only accessible on a single machine, such as SQLite, Microsoft Access, etc. +2. **Network Database**: such databases are usually complex multi-file structures that provide network-based access interfaces and support simultaneous multi-user access, such as Redis, PostgreSQL, etc. + +JuiceFS currently supports the following network-based databases. + +- **Key-Value Database**: Redis, TiKV +- **Relational Database**: PostgreSQL, MySQL, MariaDB + +Different databases have different performance and stability, for example, Redis is an in-memory key-value database with excellent performance but relatively weak reliability, and PostgreSQL is a relational database with less performance than in-memory, but it is more reliable. + +We will write a special document about database selection. + +## Cloud Database + +Cloud computing platforms usually have a wide variety of cloud database offerings, such as Amazon RDS for various relational database versions and Amazon ElastiCache for Redis-compatible in-memory database products. A multi-copy, highly available database cluster can be created with a simple initial setup. + +Of course, you can build your own database on the server if you wish. + +For simplicity, here is an example of the Amazon ElastiCache for Redis. For a network database, the most basic information is the following 2 items. + +1. **Database Address**: the access address of the database, the cloud platform may provide different links for internal and external networks. +2. **Username and Password**: authentication information used to access the database. + +## Hands-on Practice + +### 1. Install Client + +Install the JuiceFS client on all computers that need to mount the file system, refer to [Installation & Upgrade](installation.md) for details. + +### 2. Preparing Object Storage + +Here is a pseudo-sample with Amazon S3 as an example, you can switch to other object storage, refer to [JuiceFS Supported Storage](../reference/how_to_setup_object_storage.md#supported-object-storage) for details. + +- **Bucket Endpoint**: `https://myjfs.s3.us-west-1.amazonaws.com` +- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA` + +### 3. Preparing Database + +The following is a pseudo-sample of the Amazon ElastiCache for Redis as an example, you can switch to other types of databases, refer to [JuiceFS Supported Databases](../reference/how_to_setup_metadata_engine.md) for details. + +- **Database Address**: `myjfs-sh-abc.apse1.cache.amazonaws.com:6379` +- **Database Username**: `tom` +- **Database Password**: `mypassword` + +The format for using a Redis database in JuiceFS is as follows. + +``` +redis://:@:6379/1 +``` + +:::tip +Redis versions prior to 6.0 do not have username, omit the `` part of the URL, e.g. `redis://:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1` (please note that the colon in front of the password is a separator and needs to be preserved). +::: + +### 4. Creating a file system + +The following command creates a file system that supports cross-network, multi-machine simultaneous mounts, and shared reads and writes using a combination of "Object Storage" and "Redis" database. + +```shell +juicefs format \ + --storage s3 \ + --bucket https://myjfs.s3.us-west-1.amazonaws.com \ + --access-key ABCDEFGHIJKLMNopqXYZ \ + --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \ + redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 \ + myjfs +``` + +Once the file system is created, the terminal will output something like the following. + +```shell +2021/12/16 16:37:14.264445 juicefs[22290] : Meta address: redis://@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 +2021/12/16 16:37:14.277632 juicefs[22290] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/12/16 16:37:14.281432 juicefs[22290] : Ping redis: 3.609453ms +2021/12/16 16:37:14.527879 juicefs[22290] : Data uses s3://myjfs/myjfs/ +2021/12/16 16:37:14.593450 juicefs[22290] : Volume is formatted as {Name:myjfs UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:s3 Bucket:https://myjfs AccessKey:ABCDEFGHIJKLMNopqXYZ SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +:::info +Once a file system is created, the relevant information including name, object storage, access keys, etc. are recorded in the database. In the current example, the file system information is recorded in the Redis database, so any computer with the database address, username, and password information can mount and read the file system. +::: + +### 5. Mounting the file system + +Since the "data" and "metadata" of this file system are stored in cloud services, it can be mounted on any computer with a JuiceFS client installed for shared reads and writes at the same time. For example: + +```shell +juicefs mount redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 mnt +``` + +#### Strong data consistency guarantee + +JuiceFS provides a "close-to-open" consistency guarantee, which means that when two or more clients read and write the same file at the same time, the changes made by client A may not be immediately visible to client B. However, once the file is closed by client A, any client re-opened it afterwards is guaranteed to see the latest data, no matter it is on the same node with A or not. + +#### Increase cache size to improve performance + +Since Object Storage is a network-based storage service, it will inevitably encounter access latency. To solve this problem, JuiceFS provides and enables caching mechanism by default, i.e. allocating a part of local storage as a buffer layer between data and object storage, and caching data to local storage asynchronously when reading files, please refer to ["Cache"](../administration/cache_management.md) for more details. + +By default, JuiceFS will set 100GiB cache in `$HOME/.juicefs/cache` or `/var/jfsCache` directory. Setting a larger cache space on a faster SSD can effectively improve JuiceFS's read and write performance. + +You can use `--cache-dir` to adjust the location of the cache directory and `--cache-size` to adjust the size of the cache space, e.g.: + +```shell +juicefs mount + --background \ + --cache-dir /mycache \ + --cache-size 512000 \ + redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 mnt +``` + +:::note +The JuiceFS process needs permission to read and write to the `--cache-dir` directory. +::: + +The above command sets the cache directory in the `/mycache` directory and specifies the cache space as 500GiB. + +#### Auto-mount on boot + +Take a Linux system as an example and assume that the client is located in the `/usr/local/bin` directory. Rename the JuiceFS client to `mount.juicefs` and copy it to the `/sbin` directory. + +```shell +sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +Edit the `/etc/fstab` configuration file and add a new record following the rules of fstab. + +``` +redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 /mnt/myjfs juicefs _netdev,max-uploads=50,writeback,cache-size=512000 0 0 +``` + +:::note +By default, CentOS 6 does not mount the network file system at boot time, you need to run the command to enable automatic mounting support for the network file system: `sudo chkconfig --add netfs` +::: + +### 6. Unmounting the file system + +You can unmount the JuiceFS file system (assuming the mount point path is `mnt`) with the `juicefs umount` command. + +```shell +juicefs umount mnt +``` + +#### Unmounting failure + +If the command fails to unmount the file system after execution, the prompt is `Device or resource busy`. + +```shell +2021-05-09 22:42:55.757097 I | fusermount: failed to unmount mnt: Device or resource busy +exit status 1 +``` + +This may happen because some programs are reading and writing files in the file system. To ensure data security, you should first troubleshoot which programs are interacting with files on the file system (e.g. via the `lsof` command) and try to end the interaction between them before re-executing the unmount command. + +:::caution +The following commands may result in file corruption and loss, so be careful! +::: + +While you can ensure data security, you can add the `--force` or `-f` parameter to the unmount command to force the file system to be unmounted. + +```shell +juicefs umount --force mnt +``` diff --git a/docs/en/getting-started/for_local.md b/docs/en/getting-started/for_local.md new file mode 100644 index 0000000..3a54609 --- /dev/null +++ b/docs/en/getting-started/for_local.md @@ -0,0 +1,175 @@ +--- +sidebar_label: Quick Start (Standalone Mode) +sidebar_position: 2 +slug: /quick_start_guide +--- + +# JuiceFS Quick Start Guide for Standalone Mode + +The JuiceFS file system consists of ["Object Storage"](../reference/how_to_setup_object_storage.md) and ["Database"](../reference/how_to_setup_object_storage.md) are jointly driven. In addition to object storage, it also supports the use of local disk, WebDAV and HDFS, and so on as the underlying storage. Therefore, you can quickly create a standalone file system using local disks and SQLite database to understand and experience JuiceFS. + +## Install Client + +For details, please refer to [Installation & Upgrade](installation.md)。 + +Regardless of the operating system you are using, when you execute `juicefs` in the terminal and it returns the help message of the program, it means that you have successfully installed the JuiceFS client. + +## Creating a File System + +### Basic Concept + +To create a file system use the [`format`](../reference/command_reference.md#juicefs-format) command provided by the client, generally in the following format. + +```shell +juicefs format [command options] META-URL NAME +``` + +As you can see, there are 3 types of information required to format a file system. + +1. **[command options]**: Sets the storage media for the file system, if nothing is specified then **default to local disk** as the storage media, default path is `"$HOME/.juicefs/local"` or `"/var/jfs"`. +2. **META-URL**: used to set the metadata engine, usually the URL or file path to the database. +3. **NAME**: the name of the file system. + +:::tip +JuiceFS supports a wide range of storage media and metadata storage engines, see [JuiceFS supported storage medias](../reference/how_to_setup_object_storage.md) and [JuiceFS supported metadata storage engines](../reference/how_to_setup_metadata_engine.md)。 +::: + +### Hands-on Practice + +On a Linux system, for example, the following command creates a file system named `myjfs`. + +```shell +juicefs format sqlite3://myjfs.db myjfs +``` + +Completion of the creation will return an output similar to the following. + +```shell {1,4} +2021/12/14 18:26:37.666618 juicefs[40362] : Meta address: sqlite3://myjfs.db +[xorm] [info] 2021/12/14 18:26:37.667504 PING DATABASE sqlite3 +2021/12/14 18:26:37.674147 juicefs[40362] : The latency to database is too high: 7.257333ms +2021/12/14 18:26:37.675713 juicefs[40362] : Data use file:///Users/herald/.juicefs/local/myjfs/ +2021/12/14 18:26:37.689683 juicefs[40362] : Volume is formatted as {Name:myjfs UUID:d5bdf7ea-472c-4640-98a6-6f56aea13982 Storage:file Bucket:/Users/herald/.juicefs/local/ AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +As you can see from the output, the file system uses SQLite as the metadata storage engine and the database file is located in the current directory with the file name `myjfs.db`, which stores all the information of the `myjfs` file system. It has been constructed with a complete table structure that will be used as a storage for all the meta information of the data. + +![](../images/sqlite-info.png) + +Since no storage-related options are specified, the client uses the local disk as the storage medium by default. According to the output, the file system storage path is `file:///Users/herald/.juicefs/local/myjfs/`. + +## Mounting the File System + +### Basic Concept + +To mount a file system use the client-provided [`mount`](../reference/command_reference.md#juicefs-mount) command, generally in the following format + +```shell +juicefs mount [command options] META-URL MOUNTPOINT +``` + +Similar to the command to create a file system, the following information is required to mount a file system. + +1. **[command options]**: used to specify file system-related options, e.g. `-d` enables background mounts. +2. **META-URL**: used to set up metadata storage, usually the URL or file path to the database. +3. **MOUNTPOINT**: the name of the file system. + +:::tip +The mount point (MOUNTPOINT) on Windows systems should use a disk letter that is not yet occupied, e.g. `Z:`, `Y:`. +::: + +### Hands-on Practice + +:::note +As SQLite is a single file database, you should pay attention to the path of the database file when mounting it, JuiceFS supports both relative and absolute paths. +::: + +The following command mounts the `myjfs` file system to the `mnt` folder in the current directory. + +```shell +juicefs mount sqlite3://myjfs.db mnt +``` + +![](../images/sqlite-mount-local.png) + +By default, the client mounts the file system in the foreground. As you can see in the image above, the program will always run in the current terminal process, and the file system will be unmounted using the Ctrl + C key combination or by closing the terminal window. + +In order to allow the file system to remain mounted in the background, you can specify the `-d` or `--background` option when mounting, i.e. to allow the client to mount the file system in the daemon. + +```shell +juicefs mount sqlite3://myjfs.db mnt -d +``` + +Next, any files stored on mount point `mnt` will be split into specific blocks according to [How JuiceFS Stores Files](../introduction/architecture.md#how-juicefs-stores-files) and stored in `$HOME/.juicefs/local/myjfs` directory, and the corresponding metadata will be stored in the `myjfs.db` database. + +Finally, the mount point `mnt` can be unmounted by executing the following command. + +```shell +juicefs umount mnt +``` + +## Go Further + +The previous content is only suitable to quickly help you experience and understand how JucieFS works. We can take the previous content a step further by still using SQLite to store metadata and replace the local storage with "object storage" for a more useful solution. + +### Object Storage + +Object Storage is a web storage service based on the HTTP protocol that offers simple API for access. It has a flat structure, easy to scale, relatively inexpensive, and is ideal for storing large amounts of unstructured data. Almost all major cloud computing platforms provide object storage services, such as Amazon S3, Alibaba Cloud OSS, Backblaze B2, etc. + +JuiceFS supports almost all object storage services, see [JuiceFS supported storage medias](../reference/how_to_setup_object_storage.md). + +In general, creating an object store usually requires only 2 parts. + +1. Create a `Bucket` and get the Endpoint address. +2. Create the `Access Key ID` and `Access Key Secret`, the access keys for the Object Storage API. + +Using AWS S3 as an example, a created resource would look something like the following. + +- **Bucket Endpoint**: `https://myjfs.s3.us-west-1.amazonaws.com` +- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA` + +:::note +The process of creating an object store may vary slightly from platform to platform, so it is recommended to check the help manual of the cloud platform. In addition, some platforms may provide different Endpoint addresses for internal and external networks, so please choose to use the address for external network access since this article is to access the object store from local. +::: + +### Hands-on Practice + +Next, create a JuiceFS file system using SQLite and Amazon S3 object storage. + +:::note +If the `myjfs.db` file already exists, delete it first and then execute the following command. +::: + +```shell +juicefs format --storage oss \ + --bucket https://myjfs.s3.us-west-1.amazonaws.com \ + --access-key ABCDEFGHIJKLMNopqXYZ \ + --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \ + sqlite3://myjfs.db myjfs +``` + +In the above command, the database and file system names remain the same and the following information related to object storage is added. + +- `--storage`: Used to set the storage type, e.g. oss, s3, etc. +- `--bucket`: Used to set the Endpoint address of the object store. +- `--access-key`: Used to set the Object Storage Access Key ID. +- `--secret-key`: Used to set the Object Storage Access Key Secret. + +:::note +Please replace the information in the above command with your own object storage information. +::: + +Once created, you can mount it. + +```shell +juicefs mount sqlite3://myjfs.db mnt +``` + +As you can see, the mount command is exactly the same as when using local storage, because JuiceFS has already written the information about the object storage to the `myjfs.db` database, so there is no need to provide it again when mounting. + +The combination of SQLite and object storage has a higher utility value than using local disks. From an application perspective, this type of approach is equivalent to plugging an object storage with almost unlimited capacity into your local computer, allowing you to use cloud storage as a local disk. + +Further, all the data of the file system is stored in the cloud-based object storage, so the `myjfs.db` database can be copied to other computers where JuiceFS clients are installed for mounting, reading and writing. That is, any computer that can read the database with the metadata stored on it can mount and read/write to the file system. + +Obviously, it is difficult for a single file database like SQLite to be accessed by multiple computers at the same time. If SQLite is replaced by Redis, PostgreSQL, MySQL, etc., which can be accessed by multiple computers at the same time through the network, then it is possible to achieve distributed read and write on the JuiceFS file system. diff --git a/docs/en/getting-started/installation.md b/docs/en/getting-started/installation.md new file mode 100644 index 0000000..9b86276 --- /dev/null +++ b/docs/en/getting-started/installation.md @@ -0,0 +1,281 @@ +--- +sidebar_label: Installation & Upgrade +sidebar_position: 1 +slug: /installation +--- + +# Installation & Upgrade + +JuiceFS has good cross-platform capability and supports running on all kinds of operating systems of almost all major architectures, including and not limited to Linux, macOS, Windows, BSD, etc. + +The JuiceFS client has only one binary file, you can download the pre-compiled version to unzip it and use it directly, or you can compile it manually with the source code. + +## Install The Pre-compiled Client + +You can find the latest version of the client for download at [GitHub](https://github.com/juicedata/juicefs/releases). Pre-compiled versions for different CPU architectures and operating systems are available in the download list for each version, so please take care to identify your choice, e.g. + +| File Name | Description | +| ------------------------------------ | ---------------------------- | +| `juicefs-x.x.x-darwin-amd64.tar.gz` | For macOS systems with Intel chips | +| `juicefs-x.x.x-linux-amd64.tar.gz` | For Linux distributions on the x86 architecture | +| `juicefs-x.x.x-linux-arm64.tar.gz` | For Linux distributions on the ARM architecture | +| `juicefs-x.x.x-windows-amd64.tar.gz` | For Windows on the x86 architecture | +| `juicefs-hadoop-x.x.x-linux-amd64.jar` | Hadoop Java SDK for Linux distributions on the x86 architecture | + +:::tip +For macOS on M1 series chips, you can use the `darwin-amd64` version of the client dependent on [Rosetta 2](https://support.apple.com/zh-cn/HT211861), or you can refer to [Manually Compiling](#manually-compiling) to compile the native version. +::: + +### Linux + +For Linux systems with x86 architecture, download the file with the file name `linux-amd64` and execute the following command in the terminal. + +1. Get the latest version number + + ```shell + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') + ``` + +2. Download the client to the current directory + + ```shell + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +3. Unzip the installation package + + ```shell + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +4. Install the client + + ```shell + sudo install juicefs /usr/local/bin + ``` + +After completing the above 4 steps, execute the `juicefs` command in the terminal and the help message will be returned, then the client installation is successful. + +:::info +If the terminal prompts `command not found`, it may be that `/usr/local/bin` is not in your system's `PATH` environment variable. You can run `echo $PATH` to see which executable paths are set, select an appropriate path based on the return result, adjust and re-execute the installation command in step 4. +::: + +### Windows + +There are two ways to use JuiceFS on Windows systems. + +1. [Using Pre-compiled Windows client](#pre-compiled-windows-client) +2. [Using the Linux client in WSL](#using-the-linux-client-in-wsl) + +#### Pre-compiled Windows Client + +The Windows client of JuiceFS is also a standalone binary that can be downloaded and unpacked to run directly. + +1. Installing Dependencies + + Since Windows does not natively support the FUSE interface, you first need to download and install [WinFsp](http://www.secfs.net/winfsp/) in order to implement FUSE support. + + :::tip + **[WinFsp](https://github.com/billziss-gh/winfsp)** is an open source Windows file system agent that provides a FUSE emulation layer that allows JuiceFS clients to mount file systems for use on Windows systems. + ::: + +2. Install the client + + Take Windows 10 system as an example, download the file with the filename `windows-amd64`, unzip it and get `juicefs.exe` which is the JuiceFS client binary. + + To make it easier to use, you can create a folder named `juicefs` in the root directory of the `C:\` disk, and extract `juicefs.exe` to that folder. Then add `C:\juicefs` to the environment variables of your system, restart the system to let the settings take effect, and then you can run `juicefs` commands directly using the `Command Prompt` or `PowerShell` terminal that come with your system. + + ![Windows ENV path](../images/windows-path-en.png) + +#### Using the Linux client in WSL + +[WSL](https://docs.microsoft.com/en-us/windows/wsl/about) is the full name of Windows Subsystem for Linux, which is supported from Windows 10 version 2004 onwards or Windows 11. It allows you to run most of the command-line tools, utilities, and applications of GNU/Linux natively on a Windows system without incurring the overhead of a traditional virtual machine or dual-boot setup. + +For details, see "[Using JuiceFS on WSL](../tutorials/juicefs_on_wsl.md)" + +### macOS + +Since macOS does not support the FUSE interface by default, you need to install [macFUSE](https://osxfuse.github.io/) first to implement support for FUSE. + +:::tip +[macFUSE](https://github.com/osxfuse/osxfuse) is an open source file system enhancement tool that allows macOS to mount third-party file systems, enabling JuiceFS clients to mount file systems for use on macOS systems. +::: + +#### Homebrew + +If you have the [Homebrew](https://brew.sh/) package manager installed on your system, you can install the JuiceFS client by executing the following command. + +```shell +brew tap juicedata/homebrew-tap +brew install juicefs +``` + +#### Pre-compiled Binary + +You can also download the binary with the filename of `darwin-amd64`, unzip it and install the program to any executable path on your system using the `install` command, e.g. + +```shell +sudo install juicefs /usr/local/bin +``` + +### Docker + +For cases where you want to use JuiceFS in a Docker container, here is a `Dockerfile` for building a JuiceFS client image, which can be used as a base to build a JuiceFS client image alone or packaged together with other applications. + +```dockerfile +FROM ubuntu:20.04 + +RUN apt update && apt install -y curl fuse && \ + apt-get autoremove && \ + apt-get clean && \ + rm -rf \ + /tmp/* \ + /var/lib/apt/lists/* \ + /var/tmp/* + +RUN set -x && \ + mkdir /juicefs && \ + cd /juicefs && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + curl -s -L "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" \ + | tar -zx && \ + install juicefs /usr/bin && \ + cd .. && \ + rm -rf /juicefs + +CMD [ "juicefs" ] +``` + +## Manually Compiling + +If the pre-compiled client does not have a version for you, such as FreeBSD or macOS on the M1 chip, then you can use manual compilation to compile the JuiceFS client for you. + +In addition, manually compiling the client will give you priority access to various new features in JuiceFS development, but it requires some basic knowledge of software compilation. + +### Unix-like Client + +Compiling clients for Linux, macOS, BSD and other Unix-like systems requires the following dependencies: + +- [Go](https://golang.org) 1.16+ +- GCC 5.4+ + +1. Cloning source code + + ```shell + git clone https://github.com/juicedata/juicefs.git + ``` + +2. Enter the source code directory + + ```shell + cd juicefs + ``` + +3. Switching the branch + + The source code uses the `main` branch by default, and you can switch to any official release, for example to `v0.17.4`. + + ```shell + git checkout v0.17.4 + ``` + + :::caution + The development branch often involves large changes, so please do not use the clients compiled in the "development branch" for the production environment. + ::: + +4. Compiling + + ```shell + make + ``` + + The compiled `juicefs` binary is located in the current directory. + +### Compiling on Windows + +To compile the JuiceFS client on Windows, you need to install [Go](https://golang.org) 1.16+ and GCC 5.4+. + +Since GCC does not have a native Windows client, you need to use the version provided by a third party, either [MinGW-w64](https://sourceforge.net/projects/mingw-w64/) or [Cygwin](https://www.cygwin.com/). Here is the example of MinGW-w64. + +Download MinGW-w64 and add its `bin` directory to the system environment variables. + +1. Clone and enter the project directory at: + + ```shell + git clone https://github.com/juicedata/juicefs.git && cd juicefs + ``` + +2. Copy winfsp headers + + ```shell + mkdir "C:\WinFsp\inc\fuse" + ``` + + ```shell + copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\ + ``` + + ```shell + dir "C:\WinFsp\inc\fuse" + ``` + + ```shell + set CGO_CFLAGS=-IC:/WinFsp/inc/fuse + ``` + + ```shell + go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse + ``` + +3. Compile client + + ```shell + go build -ldflags="-s -w" -o juicefs.exe ./cmd + ``` + +### Cross-compiling Windows clients in Linux + +Compiling a specific version of the client for Windows is essentially the same as [Unix-like Client](#unix-like-client) and can be done directly on a Linux system, but in addition to `go` and `gcc`, which must be installed, you also need to install: + +- [mingw-w64](https://www.mingw-w64.org/downloads/) + +Just install the latest version provided by the Linux distribution package manager, e.g. Ubuntu 20.04+ can be installed directly as follows. + +```shell +sudo apt install mingw-w64 +``` + +Compile the Windows client: + +```shell +make juicefs.exe +``` + +The compiled client is a binary file named `juicefs.exe`, located in the current directory. + +## Upgrade + +The JuiceFS client has only one binary file, so to upgrade the new version you only need to replace the old one with the new one. + +- **Use pre-compiled client**: You can refer to the installation method of the corresponding system in this document, download the latest client, and overwrite the old one. +- **Manually compile the client**: You can pull the latest source code and recompile it to overwrite the old version of the client. + +:::caution +For the file system that has been mounted using the old version of JuiceFS client, you need to [unmount file system](for_distributed.md#6-unmounting-the-file-system), and then re-mount it with the new version of JuiceFS client. +::: + +## Uninstall + +The JuiceFS client has only one binary file, which can be deleted by simply finding the location of the program. For example, referring to the client installed on the Linux system in this document, execute the following command to uninstall the client. + +```shell +sudo rm /usr/local/bin/juicefs +``` + +You can also see where the program is located by using the `which` command. + +```shell +which juicefs +``` + +The path returned by the command is the location where the JuiceFS client is installed on your system. For other operating systems uninstallation methods follow the same pattern. diff --git a/docs/en/grafana_template.json b/docs/en/grafana_template.json new file mode 100644 index 0000000..ed134bc --- /dev/null +++ b/docs/en/grafana_template.json @@ -0,0 +1,2215 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 16, + "iteration": 1640078675219, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(juicefs_used_space{vol_name=\"$name\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Data Size", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Data Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(juicefs_used_inodes{vol_name=\"$name\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Files", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Files", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(juicefs_uptime{vol_name=\"$name\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Sessions", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client Sessions", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[1m]) < 5000000000) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Ops {{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Operations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 6 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_fuse_written_size_bytes_sum{vol_name=\"$name\"}[1m]) < 5000000000) by (instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Write {{instance}}", + "refId": "A" + }, + { + "expr": "sum(rate(juicefs_fuse_read_size_bytes_sum{vol_name=\"$name\"}[1m]) < 5000000000) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Read {{instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IO Throughput", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 6 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (instance,mp) * 1000000 / sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (instance,mp)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IO Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transcations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 12 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (instance,mp) * 1000000 / sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transcation Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 12 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_restart{vol_name=~\"$name\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Restarts {{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transaction Restarts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (method)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_errors{vol_name=\"$name\"}[1m])) ", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "errors", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 18 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"PUT\",vol_name=\"$name\"}[1m])) by (instance,method)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} {{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[1m])) by (instance,method)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} {{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Throughput", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:145", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:146", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 18 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (instance) * 1000000 / sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_cpu_usage{vol_name=\"$name\"}[1m])*100 < 1000) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:137", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:138", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 24 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(juicefs_memory{vol_name=\"$name\"}) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 24 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(go_goroutines) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Go threads", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_blockcache_bytes{vol_name=\"$name\"}) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 30 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_blockcache_blocks{vol_name=\"$name\"}) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Count", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 30 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[1m])) by (instance,mp) *100 / (sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[1m])) by (instance,mp) + sum(rate(juicefs_blockcache_miss{vol_name=\"$name\"}[1m])) by (instance,mp))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Hits {{instance}}:{{mp}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[1m])) by (instance,mp) *100 / (sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[1m])) by (instance,mp) + sum(rate(juicefs_blockcache_miss_bytes{vol_name=\"$name\"}[1m])) by (instance,mp))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "HitBytes {{instance}}:{{mp}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Hit Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 36 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_compact_size_histogram_bytes_count{vol_name=\"$name\"}[1m])) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Compaction", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1080", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1081", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 36 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_compact_size_histogram_bytes_sum{vol_name=\"$name\"}[1m])) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Compacted Data", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 36 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_fuse_open_handlers{vol_name=\"$name\"}) by (instance,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open File Handlers", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:921", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:922", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "juicefs", + "value": "juicefs" + }, + "hide": 0, + "label": null, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${datasource}", + "definition": "label_values(juicefs_uptime, vol_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "name", + "options": [], + "query": { + "query": "label_values(juicefs_uptime, vol_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "JuiceFS Dashboard", + "uid": "-hm07csGk", + "version": 3 +} diff --git a/docs/en/grafana_template_k8s.json b/docs/en/grafana_template_k8s.json new file mode 100644 index 0000000..fea21d0 --- /dev/null +++ b/docs/en/grafana_template_k8s.json @@ -0,0 +1,2215 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 16, + "iteration": 1640078675219, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(juicefs_used_space{vol_name=\"$name\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Data Size", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Data Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(juicefs_used_inodes{vol_name=\"$name\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Files", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Files", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(juicefs_uptime{vol_name=\"$name\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Sessions", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client Sessions", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[1m]) < 5000000000) by (node)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Ops {{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Operations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 6 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_fuse_written_size_bytes_sum{vol_name=\"$name\"}[1m]) < 5000000000) by (node)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Write {{node}}", + "refId": "A" + }, + { + "expr": "sum(rate(juicefs_fuse_read_size_bytes_sum{vol_name=\"$name\"}[1m]) < 5000000000) by (node)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Read {{node}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IO Throughput", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 6 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (node,mp) * 1000000 / sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (node,mp)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IO Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (node)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transcations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 12 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (node,mp) * 1000000 / sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transcation Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 12 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_transaction_restart{vol_name=~\"$name\"}[1m])) by (node)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Restarts {{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transaction Restarts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (method)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_errors{vol_name=\"$name\"}[1m])) ", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "errors", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 18 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"PUT\",vol_name=\"$name\"}[1m])) by (node,method)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} {{node}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[1m])) by (node,method)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} {{node}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Throughput", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:145", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:146", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "µs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 18 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_sum{vol_name=\"$name\"}[1m])) by (node) * 1000000 / sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[1m])) by (node)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_cpu_usage{vol_name=\"$name\"}[1m])*100 < 1000) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:137", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:138", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 24 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(juicefs_memory{vol_name=\"$name\"}) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Client Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 24 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(go_goroutines) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Go threads", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_blockcache_bytes{vol_name=\"$name\"}) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 30 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_blockcache_blocks{vol_name=\"$name\"}) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Count", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 30 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[1m])) by (node,mp) *100 / (sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[1m])) by (node,mp) + sum(rate(juicefs_blockcache_miss{vol_name=\"$name\"}[1m])) by (node,mp))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Hits {{node}}:{{mp}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[1m])) by (node,mp) *100 / (sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[1m])) by (node,mp) + sum(rate(juicefs_blockcache_miss_bytes{vol_name=\"$name\"}[1m])) by (node,mp))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "HitBytes {{node}}:{{mp}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Cache Hit Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 36 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_compact_size_histogram_bytes_count{vol_name=\"$name\"}[1m])) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Compaction", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1080", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1081", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 36 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(juicefs_compact_size_histogram_bytes_sum{vol_name=\"$name\"}[1m])) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Compacted Data", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 36 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(juicefs_fuse_open_handlers{vol_name=\"$name\"}) by (node,mp)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}:{{mp}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open File Handlers", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:921", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:922", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "juicefs", + "value": "juicefs" + }, + "hide": 0, + "label": null, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${datasource}", + "definition": "label_values(juicefs_uptime, vol_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "name", + "options": [], + "query": { + "query": "label_values(juicefs_uptime, vol_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "JuiceFS Dashboard", + "uid": "-hm07csGk", + "version": 3 +} diff --git a/docs/en/images/baiduyun.png b/docs/en/images/baiduyun.png new file mode 100644 index 0000000..a8c4a5e Binary files /dev/null and b/docs/en/images/baiduyun.png differ diff --git a/docs/en/images/baoyinxiaofei.png b/docs/en/images/baoyinxiaofei.png new file mode 100644 index 0000000..8cc54cb Binary files /dev/null and b/docs/en/images/baoyinxiaofei.png differ diff --git a/docs/en/images/bench-guide-bench.png b/docs/en/images/bench-guide-bench.png new file mode 100644 index 0000000..e079ad5 Binary files /dev/null and b/docs/en/images/bench-guide-bench.png differ diff --git a/docs/en/images/bench-guide-profile.png b/docs/en/images/bench-guide-profile.png new file mode 100644 index 0000000..2656bdf Binary files /dev/null and b/docs/en/images/bench-guide-profile.png differ diff --git a/docs/en/images/bench-guide-stats.png b/docs/en/images/bench-guide-stats.png new file mode 100644 index 0000000..2ea225f Binary files /dev/null and b/docs/en/images/bench-guide-stats.png differ diff --git a/docs/en/images/bigo.png b/docs/en/images/bigo.png new file mode 100644 index 0000000..8beea05 Binary files /dev/null and b/docs/en/images/bigo.png differ diff --git a/docs/en/images/cos-bucket-url.png b/docs/en/images/cos-bucket-url.png new file mode 100644 index 0000000..d8298c4 Binary files /dev/null and b/docs/en/images/cos-bucket-url.png differ diff --git a/docs/en/images/digitalocean-redis-guide.png b/docs/en/images/digitalocean-redis-guide.png new file mode 100644 index 0000000..00d410f Binary files /dev/null and b/docs/en/images/digitalocean-redis-guide.png differ diff --git a/docs/en/images/digitalocean-redis-url.png b/docs/en/images/digitalocean-redis-url.png new file mode 100644 index 0000000..8c41f67 Binary files /dev/null and b/docs/en/images/digitalocean-redis-url.png differ diff --git a/docs/en/images/dingdong.png b/docs/en/images/dingdong.png new file mode 100644 index 0000000..b362f00 Binary files /dev/null and b/docs/en/images/dingdong.png differ diff --git a/docs/en/images/encryption.png b/docs/en/images/encryption.png new file mode 100644 index 0000000..a285b19 Binary files /dev/null and b/docs/en/images/encryption.png differ diff --git a/docs/en/images/grafana_dashboard.png b/docs/en/images/grafana_dashboard.png new file mode 100644 index 0000000..c1074c1 Binary files /dev/null and b/docs/en/images/grafana_dashboard.png differ diff --git a/docs/en/images/hangtianhongtu.png b/docs/en/images/hangtianhongtu.png new file mode 100644 index 0000000..8294587 Binary files /dev/null and b/docs/en/images/hangtianhongtu.png differ diff --git a/docs/en/images/how-juicefs-stores-files-new.png b/docs/en/images/how-juicefs-stores-files-new.png new file mode 100644 index 0000000..cb09bad Binary files /dev/null and b/docs/en/images/how-juicefs-stores-files-new.png differ diff --git a/docs/en/images/how-juicefs-stores-files-redis.png b/docs/en/images/how-juicefs-stores-files-redis.png new file mode 100644 index 0000000..df29721 Binary files /dev/null and b/docs/en/images/how-juicefs-stores-files-redis.png differ diff --git a/docs/en/images/how-juicefs-stores-files.png b/docs/en/images/how-juicefs-stores-files.png new file mode 100644 index 0000000..12b94c0 Binary files /dev/null and b/docs/en/images/how-juicefs-stores-files.png differ diff --git a/docs/en/images/internals-read.png b/docs/en/images/internals-read.png new file mode 100644 index 0000000..48ca6a5 Binary files /dev/null and b/docs/en/images/internals-read.png differ diff --git a/docs/en/images/internals-stats.png b/docs/en/images/internals-stats.png new file mode 100644 index 0000000..8f026e8 Binary files /dev/null and b/docs/en/images/internals-stats.png differ diff --git a/docs/en/images/internals-write.png b/docs/en/images/internals-write.png new file mode 100644 index 0000000..1936a60 Binary files /dev/null and b/docs/en/images/internals-write.png differ diff --git a/docs/en/images/juicefs-aliyun.png b/docs/en/images/juicefs-aliyun.png new file mode 100644 index 0000000..7d2c340 Binary files /dev/null and b/docs/en/images/juicefs-aliyun.png differ diff --git a/docs/en/images/juicefs-arch-new.png b/docs/en/images/juicefs-arch-new.png new file mode 100644 index 0000000..cf065c0 Binary files /dev/null and b/docs/en/images/juicefs-arch-new.png differ diff --git a/docs/en/images/juicefs-arch.png b/docs/en/images/juicefs-arch.png new file mode 100644 index 0000000..b36611f Binary files /dev/null and b/docs/en/images/juicefs-arch.png differ diff --git a/docs/en/images/juicefs-bench.png b/docs/en/images/juicefs-bench.png new file mode 100644 index 0000000..63739c2 Binary files /dev/null and b/docs/en/images/juicefs-bench.png differ diff --git a/docs/en/images/juicefs-logo.png b/docs/en/images/juicefs-logo.png new file mode 100644 index 0000000..4b769b3 Binary files /dev/null and b/docs/en/images/juicefs-logo.png differ diff --git a/docs/en/images/juicefs-on-windows-new.png b/docs/en/images/juicefs-on-windows-new.png new file mode 100644 index 0000000..e1f1979 Binary files /dev/null and b/docs/en/images/juicefs-on-windows-new.png differ diff --git a/docs/en/images/juicefs-on-windows.png b/docs/en/images/juicefs-on-windows.png new file mode 100644 index 0000000..427fe74 Binary files /dev/null and b/docs/en/images/juicefs-on-windows.png differ diff --git a/docs/en/images/juicefs-profiling.gif b/docs/en/images/juicefs-profiling.gif new file mode 100644 index 0000000..3db752f Binary files /dev/null and b/docs/en/images/juicefs-profiling.gif differ diff --git a/docs/en/images/juicefs-qcloud.png b/docs/en/images/juicefs-qcloud.png new file mode 100644 index 0000000..f088029 Binary files /dev/null and b/docs/en/images/juicefs-qcloud.png differ diff --git a/docs/en/images/juicefs-s3-gateway-arch.png b/docs/en/images/juicefs-s3-gateway-arch.png new file mode 100644 index 0000000..36511e5 Binary files /dev/null and b/docs/en/images/juicefs-s3-gateway-arch.png differ diff --git a/docs/en/images/juicefs-storage-format-new.png b/docs/en/images/juicefs-storage-format-new.png new file mode 100644 index 0000000..684d399 Binary files /dev/null and b/docs/en/images/juicefs-storage-format-new.png differ diff --git a/docs/en/images/juicefs-storage-format.png b/docs/en/images/juicefs-storage-format.png new file mode 100644 index 0000000..adfe433 Binary files /dev/null and b/docs/en/images/juicefs-storage-format.png differ diff --git a/docs/en/images/juicefs_stats_watcher.png b/docs/en/images/juicefs_stats_watcher.png new file mode 100644 index 0000000..5b0bb21 Binary files /dev/null and b/docs/en/images/juicefs_stats_watcher.png differ diff --git a/docs/en/images/k3s-nginx-welcome.png b/docs/en/images/k3s-nginx-welcome.png new file mode 100644 index 0000000..15e6ae6 Binary files /dev/null and b/docs/en/images/k3s-nginx-welcome.png differ diff --git a/docs/en/images/kubesphere_app_shop.png b/docs/en/images/kubesphere_app_shop.png new file mode 100644 index 0000000..9796282 Binary files /dev/null and b/docs/en/images/kubesphere_app_shop.png differ diff --git a/docs/en/images/kubesphere_app_shop_en.png b/docs/en/images/kubesphere_app_shop_en.png new file mode 100644 index 0000000..7462d66 Binary files /dev/null and b/docs/en/images/kubesphere_app_shop_en.png differ diff --git a/docs/en/images/kubesphere_app_template.png b/docs/en/images/kubesphere_app_template.png new file mode 100644 index 0000000..69abc48 Binary files /dev/null and b/docs/en/images/kubesphere_app_template.png differ diff --git a/docs/en/images/kubesphere_app_template_en.png b/docs/en/images/kubesphere_app_template_en.png new file mode 100644 index 0000000..64c9360 Binary files /dev/null and b/docs/en/images/kubesphere_app_template_en.png differ diff --git a/docs/en/images/kubesphere_create_minio.png b/docs/en/images/kubesphere_create_minio.png new file mode 100644 index 0000000..ed02a25 Binary files /dev/null and b/docs/en/images/kubesphere_create_minio.png differ diff --git a/docs/en/images/kubesphere_create_minio_en.png b/docs/en/images/kubesphere_create_minio_en.png new file mode 100644 index 0000000..b8f6b9c Binary files /dev/null and b/docs/en/images/kubesphere_create_minio_en.png differ diff --git a/docs/en/images/kubesphere_create_secret.png b/docs/en/images/kubesphere_create_secret.png new file mode 100644 index 0000000..3aa88fd Binary files /dev/null and b/docs/en/images/kubesphere_create_secret.png differ diff --git a/docs/en/images/kubesphere_deployment.png b/docs/en/images/kubesphere_deployment.png new file mode 100644 index 0000000..db07a06 Binary files /dev/null and b/docs/en/images/kubesphere_deployment.png differ diff --git a/docs/en/images/kubesphere_deployment_en.png b/docs/en/images/kubesphere_deployment_en.png new file mode 100644 index 0000000..c5e4e92 Binary files /dev/null and b/docs/en/images/kubesphere_deployment_en.png differ diff --git a/docs/en/images/kubesphere_install_csi.png b/docs/en/images/kubesphere_install_csi.png new file mode 100644 index 0000000..6f864fc Binary files /dev/null and b/docs/en/images/kubesphere_install_csi.png differ diff --git a/docs/en/images/kubesphere_install_csi_en.png b/docs/en/images/kubesphere_install_csi_en.png new file mode 100644 index 0000000..cb92805 Binary files /dev/null and b/docs/en/images/kubesphere_install_csi_en.png differ diff --git a/docs/en/images/kubesphere_minio.png b/docs/en/images/kubesphere_minio.png new file mode 100644 index 0000000..4812423 Binary files /dev/null and b/docs/en/images/kubesphere_minio.png differ diff --git a/docs/en/images/kubesphere_minio_en.png b/docs/en/images/kubesphere_minio_en.png new file mode 100644 index 0000000..46c8a48 Binary files /dev/null and b/docs/en/images/kubesphere_minio_en.png differ diff --git a/docs/en/images/kubesphere_org_space.png b/docs/en/images/kubesphere_org_space.png new file mode 100644 index 0000000..fb68089 Binary files /dev/null and b/docs/en/images/kubesphere_org_space.png differ diff --git a/docs/en/images/kubesphere_pod.png b/docs/en/images/kubesphere_pod.png new file mode 100644 index 0000000..030b4b1 Binary files /dev/null and b/docs/en/images/kubesphere_pod.png differ diff --git a/docs/en/images/kubesphere_pod_en.png b/docs/en/images/kubesphere_pod_en.png new file mode 100644 index 0000000..1173b11 Binary files /dev/null and b/docs/en/images/kubesphere_pod_en.png differ diff --git a/docs/en/images/kubesphere_pvc.png b/docs/en/images/kubesphere_pvc.png new file mode 100644 index 0000000..79a1a41 Binary files /dev/null and b/docs/en/images/kubesphere_pvc.png differ diff --git a/docs/en/images/kubesphere_pvc_en.png b/docs/en/images/kubesphere_pvc_en.png new file mode 100644 index 0000000..27502b9 Binary files /dev/null and b/docs/en/images/kubesphere_pvc_en.png differ diff --git a/docs/en/images/kubesphere_redis.png b/docs/en/images/kubesphere_redis.png new file mode 100644 index 0000000..ab39e7c Binary files /dev/null and b/docs/en/images/kubesphere_redis.png differ diff --git a/docs/en/images/kubesphere_redis_en.png b/docs/en/images/kubesphere_redis_en.png new file mode 100644 index 0000000..947dd60 Binary files /dev/null and b/docs/en/images/kubesphere_redis_en.png differ diff --git a/docs/en/images/kubesphere_sc_create.png b/docs/en/images/kubesphere_sc_create.png new file mode 100644 index 0000000..6f725e2 Binary files /dev/null and b/docs/en/images/kubesphere_sc_create.png differ diff --git a/docs/en/images/kubesphere_sc_create_en.png b/docs/en/images/kubesphere_sc_create_en.png new file mode 100644 index 0000000..1b8bc38 Binary files /dev/null and b/docs/en/images/kubesphere_sc_create_en.png differ diff --git a/docs/en/images/kubesphere_sc_update.png b/docs/en/images/kubesphere_sc_update.png new file mode 100644 index 0000000..e05e217 Binary files /dev/null and b/docs/en/images/kubesphere_sc_update.png differ diff --git a/docs/en/images/kubesphere_sc_update_en.png b/docs/en/images/kubesphere_sc_update_en.png new file mode 100644 index 0000000..025d461 Binary files /dev/null and b/docs/en/images/kubesphere_sc_update_en.png differ diff --git a/docs/en/images/kubesphere_secret_en.png b/docs/en/images/kubesphere_secret_en.png new file mode 100644 index 0000000..0a5eacf Binary files /dev/null and b/docs/en/images/kubesphere_secret_en.png differ diff --git a/docs/en/images/kubesphere_shop_juicefs_en.png b/docs/en/images/kubesphere_shop_juicefs_en.png new file mode 100644 index 0000000..db5e7a8 Binary files /dev/null and b/docs/en/images/kubesphere_shop_juicefs_en.png differ diff --git a/docs/en/images/kubesphere_update_csi.png b/docs/en/images/kubesphere_update_csi.png new file mode 100644 index 0000000..203c9a4 Binary files /dev/null and b/docs/en/images/kubesphere_update_csi.png differ diff --git a/docs/en/images/kubesphere_update_csi_en.png b/docs/en/images/kubesphere_update_csi_en.png new file mode 100644 index 0000000..8ca15e2 Binary files /dev/null and b/docs/en/images/kubesphere_update_csi_en.png differ diff --git a/docs/en/images/kubesphere_update_secret.png b/docs/en/images/kubesphere_update_secret.png new file mode 100644 index 0000000..c19e630 Binary files /dev/null and b/docs/en/images/kubesphere_update_secret.png differ diff --git a/docs/en/images/kubesphere_update_secret_en.png b/docs/en/images/kubesphere_update_secret_en.png new file mode 100644 index 0000000..02a9bbb Binary files /dev/null and b/docs/en/images/kubesphere_update_secret_en.png differ diff --git a/docs/en/images/kubesphere_workload.png b/docs/en/images/kubesphere_workload.png new file mode 100644 index 0000000..abf6957 Binary files /dev/null and b/docs/en/images/kubesphere_workload.png differ diff --git a/docs/en/images/kubesphere_workload_en.png b/docs/en/images/kubesphere_workload_en.png new file mode 100644 index 0000000..519a7d2 Binary files /dev/null and b/docs/en/images/kubesphere_workload_en.png differ diff --git a/docs/en/images/lixiang.png b/docs/en/images/lixiang.png new file mode 100644 index 0000000..1714573 Binary files /dev/null and b/docs/en/images/lixiang.png differ diff --git a/docs/en/images/meta-auto-backup-list.png b/docs/en/images/meta-auto-backup-list.png new file mode 100644 index 0000000..eeaa384 Binary files /dev/null and b/docs/en/images/meta-auto-backup-list.png differ diff --git a/docs/en/images/metadata-benchmark.svg b/docs/en/images/metadata-benchmark.svg new file mode 100644 index 0000000..83273aa --- /dev/null +++ b/docs/en/images/metadata-benchmark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/en/images/mi.png b/docs/en/images/mi.png new file mode 100644 index 0000000..d8bc210 Binary files /dev/null and b/docs/en/images/mi.png differ diff --git a/docs/en/images/minio-browser.png b/docs/en/images/minio-browser.png new file mode 100644 index 0000000..11d2dcd Binary files /dev/null and b/docs/en/images/minio-browser.png differ diff --git a/docs/en/images/pv-on-juicefs.png b/docs/en/images/pv-on-juicefs.png new file mode 100644 index 0000000..87d57a0 Binary files /dev/null and b/docs/en/images/pv-on-juicefs.png differ diff --git a/docs/en/images/qcloud-redis-network.png b/docs/en/images/qcloud-redis-network.png new file mode 100644 index 0000000..17e328c Binary files /dev/null and b/docs/en/images/qcloud-redis-network.png differ diff --git a/docs/en/images/qcloud.png b/docs/en/images/qcloud.png new file mode 100644 index 0000000..0605b23 Binary files /dev/null and b/docs/en/images/qcloud.png differ diff --git a/docs/en/images/rancher-chart-info.jpg b/docs/en/images/rancher-chart-info.jpg new file mode 100644 index 0000000..95abb78 Binary files /dev/null and b/docs/en/images/rancher-chart-info.jpg differ diff --git a/docs/en/images/rancher-chart-installed.jpg b/docs/en/images/rancher-chart-installed.jpg new file mode 100644 index 0000000..63334f4 Binary files /dev/null and b/docs/en/images/rancher-chart-installed.jpg differ diff --git a/docs/en/images/rancher-chart-search.jpg b/docs/en/images/rancher-chart-search.jpg new file mode 100644 index 0000000..aac3ce1 Binary files /dev/null and b/docs/en/images/rancher-chart-search.jpg differ diff --git a/docs/en/images/rancher-cluster-create.jpg b/docs/en/images/rancher-cluster-create.jpg new file mode 100644 index 0000000..ecbcbff Binary files /dev/null and b/docs/en/images/rancher-cluster-create.jpg differ diff --git a/docs/en/images/rancher-cluster-options.jpg b/docs/en/images/rancher-cluster-options.jpg new file mode 100644 index 0000000..864753d Binary files /dev/null and b/docs/en/images/rancher-cluster-options.jpg differ diff --git a/docs/en/images/rancher-clusters.jpg b/docs/en/images/rancher-clusters.jpg new file mode 100644 index 0000000..7a0075d Binary files /dev/null and b/docs/en/images/rancher-clusters.jpg differ diff --git a/docs/en/images/rancher-new-repo.jpg b/docs/en/images/rancher-new-repo.jpg new file mode 100644 index 0000000..6a646ae Binary files /dev/null and b/docs/en/images/rancher-new-repo.jpg differ diff --git a/docs/en/images/rancher-pvc.jpg b/docs/en/images/rancher-pvc.jpg new file mode 100644 index 0000000..6c68d6b Binary files /dev/null and b/docs/en/images/rancher-pvc.jpg differ diff --git a/docs/en/images/rancher-repos.jpg b/docs/en/images/rancher-repos.jpg new file mode 100644 index 0000000..ab126b6 Binary files /dev/null and b/docs/en/images/rancher-repos.jpg differ diff --git a/docs/en/images/rancher-welcome.jpeg b/docs/en/images/rancher-welcome.jpeg new file mode 100644 index 0000000..bd77dd1 Binary files /dev/null and b/docs/en/images/rancher-welcome.jpeg differ diff --git a/docs/en/images/repo-diagram.svg b/docs/en/images/repo-diagram.svg new file mode 100644 index 0000000..9cde737 --- /dev/null +++ b/docs/en/images/repo-diagram.svg @@ -0,0 +1 @@ +sdk/javasdk/javapkgpkghackhackdocsdocscmdcmdsrcsrclibjfslibjfsconfconfwinfspwinfspvfsvfsutilsutilssyncsyncobjectobjectmetametafusefusefsfscompresscompresschunkchunkwinfsp_headerswinfsp_headerszh_cnzh_cnimagesimagesenentesttestmainmainimagesimagesimagesimagesjava/io/juicefsjava/io/juicefsjava/io/juicefsjava/io/juicefscontractcontractutilsutilsbenchbench.builder.cmd.fs.gitignore.go.h.hack.java.json.md.meta.mod.properties.sh.svg.xml.yaml.ymleach dot sized by file size \ No newline at end of file diff --git a/docs/en/images/s3-gateway-file-manager.jpg b/docs/en/images/s3-gateway-file-manager.jpg new file mode 100644 index 0000000..e5b862f Binary files /dev/null and b/docs/en/images/s3-gateway-file-manager.jpg differ diff --git a/docs/en/images/s3ql-bin.jpg b/docs/en/images/s3ql-bin.jpg new file mode 100644 index 0000000..3397f19 Binary files /dev/null and b/docs/en/images/s3ql-bin.jpg differ diff --git a/docs/en/images/sequential-read-write-benchmark.svg b/docs/en/images/sequential-read-write-benchmark.svg new file mode 100644 index 0000000..4826a70 --- /dev/null +++ b/docs/en/images/sequential-read-write-benchmark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/en/images/sf.png b/docs/en/images/sf.png new file mode 100644 index 0000000..dc7a51f Binary files /dev/null and b/docs/en/images/sf.png differ diff --git a/docs/en/images/shopee.png b/docs/en/images/shopee.png new file mode 100644 index 0000000..51f50e1 Binary files /dev/null and b/docs/en/images/shopee.png differ diff --git a/docs/en/images/spark_ql_orc.png b/docs/en/images/spark_ql_orc.png new file mode 100644 index 0000000..1af8b40 Binary files /dev/null and b/docs/en/images/spark_ql_orc.png differ diff --git a/docs/en/images/spark_sql_parquet.png b/docs/en/images/spark_sql_parquet.png new file mode 100644 index 0000000..1ebe06f Binary files /dev/null and b/docs/en/images/spark_sql_parquet.png differ diff --git a/docs/en/images/sqlite-info.png b/docs/en/images/sqlite-info.png new file mode 100644 index 0000000..24f8832 Binary files /dev/null and b/docs/en/images/sqlite-info.png differ diff --git a/docs/en/images/sqlite-mount-local.png b/docs/en/images/sqlite-mount-local.png new file mode 100644 index 0000000..90fe649 Binary files /dev/null and b/docs/en/images/sqlite-mount-local.png differ diff --git a/docs/en/images/windows-mount-startup.png b/docs/en/images/windows-mount-startup.png new file mode 100644 index 0000000..5bd57b4 Binary files /dev/null and b/docs/en/images/windows-mount-startup.png differ diff --git a/docs/en/images/windows-path-en.png b/docs/en/images/windows-path-en.png new file mode 100644 index 0000000..ec57747 Binary files /dev/null and b/docs/en/images/windows-path-en.png differ diff --git a/docs/en/images/windows-path.png b/docs/en/images/windows-path.png new file mode 100644 index 0000000..3eef636 Binary files /dev/null and b/docs/en/images/windows-path.png differ diff --git a/docs/en/images/windows-run-startup.png b/docs/en/images/windows-run-startup.png new file mode 100644 index 0000000..e46be46 Binary files /dev/null and b/docs/en/images/windows-run-startup.png differ diff --git a/docs/en/images/wsl/access-jfs-from-win-en.png b/docs/en/images/wsl/access-jfs-from-win-en.png new file mode 100644 index 0000000..ca91998 Binary files /dev/null and b/docs/en/images/wsl/access-jfs-from-win-en.png differ diff --git a/docs/en/images/wsl/access-jfs-from-win.png b/docs/en/images/wsl/access-jfs-from-win.png new file mode 100644 index 0000000..6a55698 Binary files /dev/null and b/docs/en/images/wsl/access-jfs-from-win.png differ diff --git a/docs/en/images/wsl/init.png b/docs/en/images/wsl/init.png new file mode 100644 index 0000000..3c1a862 Binary files /dev/null and b/docs/en/images/wsl/init.png differ diff --git a/docs/en/images/wsl/mount-point.png b/docs/en/images/wsl/mount-point.png new file mode 100644 index 0000000..ff69305 Binary files /dev/null and b/docs/en/images/wsl/mount-point.png differ diff --git a/docs/en/images/wsl/startmenu-en.png b/docs/en/images/wsl/startmenu-en.png new file mode 100644 index 0000000..22c901d Binary files /dev/null and b/docs/en/images/wsl/startmenu-en.png differ diff --git a/docs/en/images/wsl/startmenu.png b/docs/en/images/wsl/startmenu.png new file mode 100644 index 0000000..4831c2d Binary files /dev/null and b/docs/en/images/wsl/startmenu.png differ diff --git a/docs/en/images/wsl/windows-to-linux-en.png b/docs/en/images/wsl/windows-to-linux-en.png new file mode 100644 index 0000000..48cf62f Binary files /dev/null and b/docs/en/images/wsl/windows-to-linux-en.png differ diff --git a/docs/en/images/wsl/windows-to-linux.png b/docs/en/images/wsl/windows-to-linux.png new file mode 100644 index 0000000..6dba6ac Binary files /dev/null and b/docs/en/images/wsl/windows-to-linux.png differ diff --git a/docs/en/images/wsl/winver-en.png b/docs/en/images/wsl/winver-en.png new file mode 100644 index 0000000..2459239 Binary files /dev/null and b/docs/en/images/wsl/winver-en.png differ diff --git a/docs/en/images/wsl/winver.png b/docs/en/images/wsl/winver.png new file mode 100644 index 0000000..0a9a282 Binary files /dev/null and b/docs/en/images/wsl/winver.png differ diff --git a/docs/en/images/wsl/zone-identifier-en.png b/docs/en/images/wsl/zone-identifier-en.png new file mode 100644 index 0000000..491625a Binary files /dev/null and b/docs/en/images/wsl/zone-identifier-en.png differ diff --git a/docs/en/images/wsl/zone-identifier.png b/docs/en/images/wsl/zone-identifier.png new file mode 100644 index 0000000..51d1881 Binary files /dev/null and b/docs/en/images/wsl/zone-identifier.png differ diff --git a/docs/en/introduction/_case.md b/docs/en/introduction/_case.md new file mode 100644 index 0000000..56e8f50 --- /dev/null +++ b/docs/en/introduction/_case.md @@ -0,0 +1,39 @@ +--- +sidebar_label: Use Scenarios & Limits +sidebar_position: 2 +slug: /case +--- +# JuiceFS Use Scenarios & Limits + +JuiceFS is widely applicable to various data storage and sharing scenarios. JuiceFS application cases from all over the world are summarized here. All community users are welcome to maintain this case list. + +## Data backup and recovery + +- [JuiceFS for archive NGINX logs](https://juicefs.com/docs/en/archive_nginx_log_in_juicefs.html) +- [JuiceFS for MySQL backup, verification and recovery](https://juicefs.com/docs/en/backup_mysql_in_juicefs.html) +- [Customer Stories: Xiachufang MySQL backup practice on JuiceFS](https://juicefs.com/blog/en/posts/xiachufang-mysql-backup-practice-on-juicefs/) + +## Big Data + +- [How to effectively reduce the load of HDFS cluster for Qutoutiao(NASDAQ:QTT)](https://juicefs.com/blog/en/posts/qutoutiao-big-data-platform-user-case/) +- [How does the Globalegrow data platform achieve both speed and money savings?](https://juicefs.com/blog/en/posts/globalegrow-big-data-platform-user-case/) +- [How to make HBase faster, more stable, and cheaper](https://juicefs.com/blog/en/posts/how-to-make-hbase-faster-more-stable-and-cheaper/) +- [Exploring storage and computing separation for ClickHouse](https://juicefs.com/blog/en/posts/clickhouse-disaggregated-storage-and-compute-practice/) + +## Data sharing + +- [Building a Milvus Cluster Based on JuiceFS](https://juicefs.com/blog/en/posts/build-milvus-distributed-cluster-based-on-juicefs/) + + +## Contribution + +If you want to add JuiceFS application cases to this list, you can apply in the following ways: + +### 1. GitHub contribution + +You can create a branch of this repository through GitHub, add the titile and URL of your case page to the corresponding category, create a Pull Request, and wait for review and branch merging. + +### 2. Social media + +You can join JuiceFS official [Slack channel](https://juicefs.slack.com/), any staff member can contact contribution matters. + diff --git a/docs/en/introduction/architecture.md b/docs/en/introduction/architecture.md new file mode 100644 index 0000000..3025ac1 --- /dev/null +++ b/docs/en/introduction/architecture.md @@ -0,0 +1,44 @@ +--- +sidebar_label: Architecture +sidebar_position: 2 +slug: /architecture +--- + +# Architecture + +The JuiceFS file system consists of three parts: + +1. **JuiceFS Client**: coordinating object storage and metadata engine, and implementation of file system interfaces such as POSIX, Hadoop, Kubernetes CSI Driver, S3 Gateway, etc.. +2. **Data Storage**: storage of the data itself, supporting media such as local disk, public or private cloud object storage, HDFS, etc. +3. **Metadata Engine**: storage data corresponding metadata contains file name, file size, permission group, creation and modification time and directory structure, etc., supporting Redis, MySQL, TiKV and other engines. + +![image](../images/juicefs-arch-new.png) + +As a file system, JuiceFS handles the data and its corresponding metadata separately, with the data being stored in the object store and the metadata being stored in the metadata engine. + +In terms of **data storage**, JuiceFS supports almost all public cloud object stores, as well as OpenStack Swift, Ceph, MinIO and other open source object stores that support private deployments. + +In terms of **metadata storage**, JuiceFS is designed with multiple engines, and currently supports Redis, TiKV, MySQL/MariaDB, PostgreSQL, SQLite, etc. as metadata service engines, and will implement more multiple data storage engines one after another. Welcome to [Submit Issue](https://github.com/juicedata/juicefs/issues) to feedback your requirements. + +In terms of **File System Interface** implementation: + +- With **FUSE**, the JuiceFS file system can be mounted to the server in a POSIX-compatible manner to use massive cloud storage directly as local storage. +- With **Hadoop Java SDK**, JuiceFS file system can directly replace HDFS and provide low-cost mass storage for Hadoop. +- With the **Kubernetes CSI Driver**, the JuiceFS file system can directly provide mass storage for Kubernetes. +- With **S3 Gateway**, applications using S3 as the storage layer can directly access the JuiceFS file system and use tools such as AWS CLI, s3cmd, and MinIO client. + +## How JuiceFS Stores Files + +The file system acts as a medium for interaction between the user and the hard drive, which allows files to be stored on the hard drive properly. As you know, Windows commonly used file systems are FAT32, NTFS, Linux commonly used file systems are Ext4, XFS, Btrfs, etc., each file system has its own unique way of organizing and managing files, which determines the file system Features such as storage capacity and performance. + +As a file system, JuiceFS is no exception. Its strong consistency and high performance are inseparable from its unique file management mode. + +Unlike the traditional file system that can only use local disks to store data and corresponding metadata, JuiceFS will format the data and store it in object storage (cloud storage), and store the metadata corresponding to the data in databases such as Redis. . + +Any file stored in JuiceFS will be split into fixed-size **"Chunk"**, and the default upper limit is 64 MiB. Each Chunk is composed of one or more **"Slice"**. The length of the slice is not fixed, depending on the way the file is written. Each slice will be further split into fixed-size **"Block"**, which is 4 MiB by default. Finally, these blocks will be stored in the object storage. At the same time, JuiceFS will store each file and its Chunks, Slices, Blocks and other metadata information in metadata engines. + +![](../images/juicefs-storage-format-new.png) + +Using JuiceFS, files will eventually be split into Chunks, Slices and Blocks and stored in object storage. Therefore, you will find that the source files stored in JuiceFS cannot be found in the file browser of the object storage platform. There is a chunks directory and a bunch of digitally numbered directories and files in the bucket. Don't panic, this is the secret of the high-performance operation of the JuiceFS file system! + +![How JuiceFS stores your files](../images/how-juicefs-stores-files-new.png) diff --git a/docs/en/introduction/introduction.md b/docs/en/introduction/introduction.md new file mode 100644 index 0000000..a8b31ea --- /dev/null +++ b/docs/en/introduction/introduction.md @@ -0,0 +1,65 @@ +--- +title: What is JuiceFS? +sidebar_label: What is JuiceFS +sidebar_position: 1 +slug: . +--- +# + +![JuiceFS LOGO](../images/juicefs-logo.png) + +**JuiceFS** is a high-performance shared file system designed for cloud-native use and released under the Apache License 2.0. It provides full [POSIX](https://en.wikipedia.org/wiki/POSIX) compatibility, allowing almost all object stores to be used locally as massive local disks, and to be mounted and read on different hosts across platforms and regions at the same time. + +JuiceFS implements the distributed design of file system by separating "data" and "metadata" storage architecture. When using JuiceFS to store data, the data itself is persisted in [object storage](../reference/how_to_setup_object_storage.md#supported-object-storage) (e.g., Amazon S3), and the corresponding metadata can be persisted on-demand in various [databases](../reference/how_to_setup_metadata_engine.md) such as Redis, MySQL, TiKV, SQLite, etc. + +JuiceFS provides rich APIs for various forms of data management, analysis, archiving, and backup, and can seamlessly interface with big data, machine learning, artificial intelligence and other application platforms without modifying code, providing them with massive, elastic, and low-cost high-performance storage. It allows you to focus on business development and improve R&D efficiency without worrying about availability, disaster recovery, monitoring, and expansion. Make it easier for operations and maintenance teams to transform to DevOps teams. + +## Features + +1. **POSIX Compatible**: used like a local file system, seamlessly interfacing with existing applications. +2. **HDFS Compatible**: Full compatibility with the [HDFS API](../deployment/hadoop_java_sdk.md), providing enhanced metadata performance. +3. **S3 Compatible**: Provides [S3 gateway](../deployment/s3_gateway.md) implementing the S3-compatible access interface. +4. **Cloud-Native**: Use JuiceFS in Kubernetes easily via [CSI Driver](../deployment/how_to_use_on_kubernetes.md). +5. **Distributed**: the same file system can be mounted on thousands of servers at the same time, with high performance concurrent reads and writes and shared data. +6. **Strong Consistency**: confirmed file changes are immediately visible on all servers, ensuring strong consistency. +7. **Better Performance**: millisecond latency, nearly unlimited throughput (depending on object storage scale), see [performance test results](../benchmark/benchmark.md). +8. **Data Security**: Supports encryption in transit and encryption at rest, [View Details](../security/encrypt.md). +9. **File lock**: support for BSD lock (flock) and POSIX lock (fcntl). +10. **Data Compression**: Supports [LZ4](https://lz4.github.io/lz4) and [Zstandard](https://facebook.github.io/zstd) compression algorithms to save storage space. + +## Architecture + +The JuiceFS file system consists of three parts: + +1. **JuiceFS Client**: coordinating object storage and metadata engine, and implementation of file system interfaces such as POSIX, Hadoop, Kubernetes CSI Driver, S3 Gateway, etc.. +2. **Data Storage**: storage of the data itself, supporting media such as local disk, public or private cloud object storage, HDFS, etc. +3. **Metadata Engine**: storage data corresponding metadata contains file name, file size, permission group, creation and modification time and directory structure, etc., supporting Redis, MySQL, TiKV and other engines. + +![image](../images/juicefs-arch-new.png) + +As a file system, JuiceFS handles the data and its corresponding metadata separately, with the data being stored in the object store and the metadata being stored in the metadata engine. + +In terms of **data storage**, JuiceFS supports almost all public cloud object stores, as well as OpenStack Swift, Ceph, MinIO and other open source object stores that support private deployments. + +In terms of **metadata storage**, JuiceFS is designed with multiple engines, and currently supports Redis, TiKV, MySQL/MariaDB, PostgreSQL, SQLite, etc. as metadata service engines, and will implement more multiple data storage engines one after another. Welcome to [Submit Issue](https://github.com/juicedata/juicefs/issues) to feedback your requirements. + +In terms of **File System Interface** implementation: + +- With **FUSE**, the JuiceFS file system can be mounted to the server in a POSIX-compatible manner to use massive cloud storage directly as local storage. +- With **Hadoop Java SDK**, JuiceFS file system can directly replace HDFS and provide low-cost mass storage for Hadoop. +- With the **Kubernetes CSI Driver**, the JuiceFS file system can directly provide mass storage for Kubernetes. +- With **S3 Gateway**, applications using S3 as the storage layer can directly access the JuiceFS file system and use tools such as AWS CLI, s3cmd, and MinIO client. + +## Scenarios + +JuiceFS is designed for massive data storage and can be used as an alternative to many distributed file systems and network file systems, especially for the following scenarios. + +- **Big Data Analytics**: HDFS-compatible without any special API intrusion into the business; seamless integration with mainstream computing engines (Spark, Presto, Hive, etc.); infinitely scalable storage space; almost 0 operation and maintenance cost; perfect caching mechanism, several times higher than object storage performance. +- **Machine Learning**: POSIX compatible, can support all machine learning, deep learning frameworks; sharing capabilities to improve the efficiency of team management, use of data. +- **Persistent volumes in container clusters**: Kubernetes CSI support; persistent storage and independent from container lifetime; strong consistency to ensure correct data; take over data storage requirements to ensure statelessness of the service. +- **Shared Workspace**: can be mounted on any host; no client concurrent read/write restrictions; POSIX compatible with existing data flow and scripting operations. +- **Data Backup**: Back up all kinds of data in unlimited smoothly scalable storage space; combined with the shared mount feature, you can aggregate multi-host data to one place and do unified backup. + +## Data Privacy + +JuiceFS is open source software, and you can find the full source code at [GitHub](https://github.com/juicedata/juicefs). When using JuiceFS to store data, the data is split into chunks according to certain rules and stored in your own defined object storage or other storage media, and the metadata corresponding to the data is stored in your own defined database. diff --git a/docs/en/mount_at_boot.md b/docs/en/mount_at_boot.md new file mode 100644 index 0000000..fc9503a --- /dev/null +++ b/docs/en/mount_at_boot.md @@ -0,0 +1,90 @@ +# Mount JuiceFS at Boot + +This is a guide about how to mount JuiceFS automatically at boot. + +## Linux + +Copy `juicefs` as `/sbin/mount.juicefs`, then edit `/etc/fstab` with following line: + +``` + juicefs _netdev[,] 0 0 +``` + +The format of `` is `redis://:@:/`, e.g. `redis://localhost:6379/1`. And replace `` with specific path you wanna mount JuiceFS to, e.g. `/jfs`. If you need set [mount options](reference/command_reference.md#juicefs-mount), replace `[,]` with comma separated options list. The following line is an example: + +``` +redis://localhost:6379/1 /jfs juicefs _netdev,max-uploads=50,writeback,cache-size=2048 0 0 +``` + +**Note: By default, CentOS 6 will NOT mount network file system after boot, run following command to enable it:** + +```bash +$ sudo chkconfig --add netfs +``` + +## macOS + +Create a file named `io.juicefs..plist` under `~/Library/LaunchAgents`. Replace `` with JuiceFS volume name. Add following contents to the file (again, replace `NAME`, `PATH-TO-JUICEFS`, `META-URL` and `MOUNTPOINT` with appropriate value): + +```xml + + + + + Label + io.juicefs.NAME + ProgramArguments + + PATH-TO-JUICEFS + mount + META-URL + MOUNTPOINT + + RunAtLoad + + + +``` + +Use following commands to load the file created in the previous step and test whether the loading is successful. **Please ensure Redis server is already running.** + +```bash +$ launchctl load ~/Library/LaunchAgents/io.juicefs..plist +$ launchctl start ~/Library/LaunchAgents/io.juicefs. +$ ls +``` + +If mount failed, you can add following configuration to `io.juicefs..plist` file for debug purpose: + +```xml + StandardOutPath + /tmp/juicefs.out + StandardErrorPath + /tmp/juicefs.err +``` + +Use following commands to reload the latest configuration and inspect the output: + +```bash +$ launchctl unload ~/Library/LaunchAgents/io.juicefs..plist +$ launchctl load ~/Library/LaunchAgents/io.juicefs..plist +$ cat /tmp/juicefs.out +$ cat /tmp/juicefs.err +``` + +If you install Redis server by Homebrew, you could use following command to start it at boot: + +```bash +$ brew services start redis +``` + +Then add following configuration to `io.juicefs..plist` file for ensure Redis server is loaded: + +```xml + KeepAlive + + OtherJobEnabled + homebrew.mxcl.redis + +``` + diff --git a/docs/en/reference/command_reference.md b/docs/en/reference/command_reference.md new file mode 100644 index 0000000..48e260f --- /dev/null +++ b/docs/en/reference/command_reference.md @@ -0,0 +1,738 @@ +--- +sidebar_label: Command Reference +sidebar_position: 1 +slug: /command_reference +--- +# Command Reference + +There are many commands to help you manage your file system. This page provides a detailed reference for these commands. + +## Overview + +If you run `juicefs` by itself, it will print all available commands. In addition, you can add `-h/--help` flag after each command to get more information of it. + +```bash +$ juicefs -h +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 1.0-dev (2021-12-27 3462bdbf) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime statistics + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + config change config of a volume + destroy destroy an existing volume + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +:::note +If `juicefs` is not placed in your `$PATH`, you should run the script with the path to the script. For example, if `juicefs` is placed in current directory, you should use `./juicefs`. It is recommended to place `juicefs` in your `$PATH` for convenience. You can refer to [Installation & Upgrade](../getting-started/installation.md) for more information. +::: + +:::note +If the command option is of boolean type, such as `--debug`, there is no need to set any value, just add `--debug` to the command to enable the function, and vice versa to disable it. +::: + +## Auto Completion + +:::note +This feature requires JuiceFS >= 0.15.2. It is implemented based on `github.com/urfave/cli/v2`. You can find more information [here](https://github.com/urfave/cli/blob/master/docs/v2/manual.md#enabling). +::: + +To enable commands completion, simply source the script provided within `hack/autocomplete`. For example: + +Bash: + +```bash +source hack/autocomplete/bash_autocomplete +``` + +Zsh: + +```bash +source hack/autocomplete/zsh_autocomplete +``` + +Please note the auto-completion is only enabled for the current session. If you want it for all new sessions, add the `source` command to `.bashrc` or `.zshrc`: + +```bash +echo "source path/to/bash_autocomplete" >> ~/.bashrc +``` + +or + +```bash +echo "source path/to/zsh_autocomplete" >> ~/.zshrc +``` + +Alternatively, if you are using bash on a Linux system, you may just copy the script to `/etc/bash_completion.d` and rename it to `juicefs`: + +```bash +sudo cp hack/autocomplete/bash_autocomplete /etc/bash_completion.d/juicefs +``` + +```shell +source /etc/bash_completion.d/juicefs +``` + +## Commands + +### juicefs format + +#### Description + +Format a volume. It's the first step for initializing a new file system volume. + +#### Synopsis + +``` +juicefs format [command options] META-URL NAME +``` + +- **META-URL**: Database URL for metadata storage, see "[JuiceFS supported metadata engines](how_to_setup_metadata_engine.md)" for details. +- **NAME**: the name of the file system + +#### Options + +`--block-size value`
+size of block in KiB (default: 4096) + +`--capacity value`
+the limit for space in GiB (default: unlimited) + +`--inodes value`
+the limit for number of inodes (default: unlimited) + +`--compress value`
+compression algorithm (lz4, zstd, none) (default: "none") + +`--shards value`
+store the blocks into N buckets by hash of key (default: 0) + +`--storage value`
+Object storage type (e.g. s3, gcs, oss, cos) (default: "file") + +`--bucket value`
+A bucket URL to store data (default: `"$HOME/.juicefs/local"` or `"/var/jfs"`) + +`--access-key value`
+Access key for object storage (env `ACCESS_KEY`) + +`--secret-key value`
+Secret key for object storage (env `SECRET_KEY`) + +`--encrypt-rsa-key value`
+A path to RSA private key (PEM) + +`--trash-days value`
+number of days after which removed files will be permanently deleted (default: 1) + +`--force`
+overwrite existing format (default: false) + +`--no-update`
+don't update existing volume (default: false) + +### juicefs mount + +#### Description + +Mount a volume. The volume shoud be formatted first. + +#### Synopsis + +``` +juicefs mount [command options] META-URL MOUNTPOINT +``` + +- **META-URL**: Database URL for metadata storage, see "[JuiceFS supported metadata engines](how_to_setup_metadata_engine.md)" for details. +- **MOUNTPOINT**: file system mount point, e.g. `/mnt/jfs`, `Z:`. + +#### Options + +`--metrics value`
+address to export metrics (default: "127.0.0.1:9567") + +`--consul value`
+consul address to register (default: "127.0.0.1:8500") + +`--no-usage-report`
+do not send usage report (default: false) + +`-d, --background`
+run in background (default: false) + +`--no-syslog`
+disable syslog (default: false) + +`--log value`
+path of log file when running in background (default: `$HOME/.juicefs/juicefs.log` or `/var/log/juicefs.log`) + +`-o value`
+other FUSE options (see [this document](../reference/fuse_mount_options.md) for more information) + +`--attr-cache value`
+attributes cache timeout in seconds (default: 1) + +`--entry-cache value`
+file entry cache timeout in seconds (default: 1) + +`--dir-entry-cache value`
+dir entry cache timeout in seconds (default: 1) + +`--enable-xattr`
+enable extended attributes (xattr) (default: false) + +`--bucket value`
+customized endpoint to access object store + +`--get-timeout value`
+the max number of seconds to download an object (default: 60) + +`--put-timeout value`
+the max number of seconds to upload an object (default: 60) + +`--io-retries value`
+number of retries after network failure (default: 30) + +`--max-uploads value`
+number of connections to upload (default: 20) + +`--max-deletes value`
+number of threads to delete objects (default: 2) + +`--buffer-size value`
+total read/write buffering in MiB (default: 300) + +`--upload-limit value`
+bandwidth limit for upload in Mbps (default: 0) + +`--download-limit value`
+bandwidth limit for download in Mbps (default: 0) + +`--prefetch value`
+prefetch N blocks in parallel (default: 1) + +`--writeback`
+upload objects in background (default: false) + +`--cache-dir value`
+directory paths of local cache, use colon to separate multiple paths (default: `"$HOME/.juicefs/cache"` or `"/var/jfsCache"`) + +`--cache-size value`
+size of cached objects in MiB (default: 102400) + +`--free-space-ratio value`
+min free space (ratio) (default: 0.1) + +`--cache-partial-only`
+cache only random/small read (default: false) + +`--read-only`
+allow lookup/read operations only (default: false) + +`--open-cache value`
+open file cache timeout in seconds (0 means disable this feature) (default: 0) + +`--subdir value`
+mount a sub-directory as root (default: "") + +### juicefs umount + +#### Description + +Unmount a volume. + +#### Synopsis + +``` +juicefs umount [command options] MOUNTPOINT +``` + +#### Options + +`-f, --force`
+unmount a busy mount point by force (default: false) + +### juicefs gateway + +#### Description + +S3-compatible gateway. + +#### Synopsis + +``` +juicefs gateway [command options] META-URL ADDRESS +``` + +- **META-URL**: Database URL for metadata storage, see "[JuiceFS supported metadata engines](how_to_setup_metadata_engine.md)" for details. +- **ADDRESS**: S3 gateway address and listening port, for example: `localhost:9000` + +#### Options + +`--bucket value`
+customized endpoint to access object store + +`--get-timeout value`
+the max number of seconds to download an object (default: 60) + +`--put-timeout value`
+the max number of seconds to upload an object (default: 60) + +`--io-retries value`
+number of retries after network failure (default: 30) + +`--max-uploads value`
+number of connections to upload (default: 20) + +`--max-deletes value`
+number of threads to delete objects (default: 2) + +`--buffer-size value`
+total read/write buffering in MiB (default: 300) + +`--upload-limit value`
+bandwidth limit for upload in Mbps (default: 0) + +`--download-limit value`
+bandwidth limit for download in Mbps (default: 0) + +`--prefetch value`
+prefetch N blocks in parallel (default: 1) + +`--writeback`
+upload objects in background (default: false) + +`--cache-dir value`
+directory paths of local cache, use colon to separate multiple paths (default: `"$HOME/.juicefs/cache"` or `/var/jfsCache`) + +`--cache-size value`
+size of cached objects in MiB (default: 102400) + +`--free-space-ratio value`
+min free space (ratio) (default: 0.1) + +`--cache-partial-only`
+cache only random/small read (default: false) + +`--read-only`
+allow lookup/read operations only (default: false) + +`--open-cache value`
+open file cache timeout in seconds (0 means disable this feature) (default: 0) + +`--subdir value`
+mount a sub-directory as root (default: "") + +`--attr-cache value`
+attributes cache timeout in seconds (default: 1) + +`--entry-cache value`
+file entry cache timeout in seconds (default: 0) + +`--dir-entry-cache value`
+dir entry cache timeout in seconds (default: 1) + +`--access-log value`
+path for JuiceFS access log + +`--metrics value`
+address to export metrics (default: "127.0.0.1:9567") + +`--no-usage-report`
+do not send usage report (default: false) + +`--no-banner`
+disable MinIO startup information (default: false) + +`--multi-buckets`
+use top level of directories as buckets (default: false) + +`--keep-etag`
+Save the ETag for uploaded objects (default: false) + + +### juicefs sync + +#### Description + +Sync between two storage. + +#### Synopsis + +``` +juicefs sync [command options] SRC DST +``` + +- **SRC**: source path +- **DST**: destination path + +The format of both the source and destination paths is `[NAME://][ACCESS_KEY:SECRET_KEY@]BUCKET[.ENDPOINT][/PREFIX]`, among the path: + +- `NAME`: JuiceFS supported data storage types (e.g. `s3`, `oss`), please refer to [document](how_to_setup_object_storage.md#supported-object-storage). +- `ACCESS_KEY` and `SECRET_KEY`: The credential required to access the data storage, please refer to [document](how_to_setup_object_storage.md#access-key-and-secret-key). +- `BUCKET[.ENDPOINT]`: The access address of the data storage service, the format may be different for different storage types, please refer to [document](how_to_setup_object_storage.md#supported-object-storage). +- `[/PREFIX]`: Optional, a prefix for the source and destination paths that can be used to limit the synchronization to only data in certain paths. + +#### Options + +`--start KEY, -s KEY`
+the first KEY to sync + +`--end KEY, -e KEY`
+the last KEY to sync + +`--threads value, -p value`
+number of concurrent threads (default: 10) + +`--http-port PORT`
+HTTP PORT to listen to (default: 6070) + +`--update, -u`
+update existing file if the source is newer (default: false) + +`--force-update, -f`
+always update existing file (default: false) + +`--perms`
+preserve permissions (default: false) + +`--dirs`
+Sync directories or holders (default: false) + +`--dry`
+don't copy file (default: false) + +`--delete-src, --deleteSrc`
+delete objects from source after synced (default: false) + +`--delete-dst, --deleteDst`
+delete extraneous objects from destination (default: false) + +`--exclude PATTERN`
+exclude keys containing PATTERN (POSIX regular expressions) + +`--include PATTERN`
+only include keys containing PATTERN (POSIX regular expressions) + +`--manager value`
+manager address + +`--worker value`
+hosts (seperated by comma) to launch worker + +`--bwlimit value`
+limit bandwidth in Mbps (0 means unlimited) (default: 0) + +`--no-https`
+do not use HTTPS (default: false) + +`--check-all`
+verify integrity of all files in source and destination (default: false) + +`--check-new`
+verify integrity of newly copied files (default: false) + +### juicefs rmr + +#### Description + +Remove all files in directories recursively. + +#### Synopsis + +``` +juicefs rmr PATH ... +``` + +### juicefs info + +#### Description + +Show internal information for given paths or inodes. + +#### Synopsis + +``` +juicefs info [command options] PATH or INODE +``` + +#### Options + +`--inode, -i`
+use inode instead of path (current dir should be inside JuiceFS) (default: false) + +`--recursive, -r`
+get summary of directories recursively (NOTE: it may take a long time for huge trees) (default: false) + +### juicefs bench + +#### Description + +Run benchmark, include read/write/stat big and small files. + +#### Synopsis + +``` +juicefs bench [command options] PATH +``` + +#### Options + +`--block-size value`
+block size in MiB (default: 1) + +`--big-file-size value`
+size of big file in MiB (default: 1024) + +`--small-file-size value`
+size of small file in MiB (default: 0.1) + +`--small-file-count value`
+number of small files (default: 100) + +`--threads value, -p value`
+number of concurrent threads (default: 1) + +### juicefs gc + +#### Description + +Collect any leaked objects. + +#### Synopsis + +``` +juicefs gc [command options] META-URL +``` + +#### Options + +`--delete`
+deleted leaked objects (default: false) + +`--compact`
+compact all chunks with more than 1 slices (default: false). + +`--threads value`
+number threads to delete leaked objects (default: 10) + +### juicefs fsck + +#### Description + +Check consistency of file system. + +#### Synopsis + +``` +juicefs fsck [command options] META-URL +``` + +### juicefs profile + +#### Description + +Analyze [access log](../administration/fault_diagnosis_and_analysis.md#access-log). + +#### Synopsis + +``` +juicefs profile [command options] MOUNTPOINT/LOGFILE +``` + +#### Options + +`--uid value, -u value`
+track only specified UIDs(separated by comma ,) + +`--gid value, -g value`
+track only specified GIDs(separated by comma ,) + +`--pid value, -p value`
+track only specified PIDs(separated by comma ,) + +`--interval value`
+flush interval in seconds; set it to 0 when replaying a log file to get an immediate result (default: 2) + +### juicefs stats + +#### Description + +Show runtime statistics + +#### Synopsis + +``` +juicefs stats [command options] MOUNTPOINT +``` + +#### Options + +`--schema value`
+schema string that controls the output sections (u: usage, f: fuse, m: meta, c: blockcache, o: object, g: go) (default: "ufmco") + +`--interval value`
+interval in seconds between each update (default: 1) + +`--verbosity value`
+verbosity level, 0 or 1 is enough for most cases (default: 0) + +`--nocolor`
+disable colors (default: false) + +### juicefs status + +#### Description + +Show status of JuiceFS + +#### Synopsis + +``` +juicefs status [command options] META-URL +``` + +#### Options + +`--session value, -s value`
+show detailed information (sustained inodes, locks) of the specified session (sid) (default: 0) + +### juicefs warmup + +#### Description + +Build cache for target directories/files + +#### Synopsis + +``` +juicefs warmup [command options] [PATH ...] +``` + +#### Options + +`--file value, -f value`
+file containing a list of paths + +`--threads value, -p value`
+number of concurrent workers (default: 50) + +`--background, -b`
+run in background (default: false) + +### juicefs dump + +#### Description + +Dump metadata into a JSON file + +#### Synopsis + +``` +juicefs dump [command options] META-URL [FILE] +``` + +When the FILE is not provided, STDOUT will be used instead. + +#### Options + +`--subdir value`
+only dump a sub-directory. + +### juicefs load + +#### Description + +Load metadata from a previously dumped JSON file + +#### Synopsis + +``` +juicefs load [command options] META-URL [FILE] +``` + +When the FILE is not provided, STDIN will be used instead. + +### juicefs config + +#### Description + +Change config of a volume + +#### Synopsis + +``` +juicefs config [command options] META-URL +``` + +#### Options + +`--capacity value`
+the limit for space in GiB + +`--inodes value`
+the limit for number of inodes + +`--bucket value`
+a bucket URL to store data + +`--access-key value`
+access key for object storage + +`--secret-key value`
+secret key for object storage + +`--trash-days value`
+number of days after which removed files will be permanently deleted + +`--force`
+skip sanity check and force update the configurations (default: false) + +### juicefs destroy + +#### Description + +Destroy an existing volume + +#### Synopsis + +``` +juicefs destroy [command options] META-URL UUID +``` + +#### Options + +`--force`
+skip sanity check and force destroy the volume (default: false) diff --git a/docs/en/reference/fuse_mount_options.md b/docs/en/reference/fuse_mount_options.md new file mode 100644 index 0000000..a6fc2b5 --- /dev/null +++ b/docs/en/reference/fuse_mount_options.md @@ -0,0 +1,26 @@ +--- +sidebar_label: FUSE Mount Options +sidebar_position: 6 +slug: /fuse_mount_options +--- +# FUSE Mount Options + +This is a guide that lists important FUSE mount options. These mount options are specified by `-o` option when execute [`juicefs mount`](../reference/command_reference.md#juicefs-mount) command (use comma to separate multiple options). For example: + +```bash +$ juicefs mount -d -o allow_other,writeback_cache localhost ~/jfs +``` + +## debug + +Enable debug log + +## allow_other + +This option overrides the security measure restricting file access to the user mounting the file system. So all users (including root) can access the files. This option is by default only allowed to root, but this restriction can be removed with `user_allow_other` configuration option in `/etc/fuse.conf`. + +## writeback_cache + +> **Note**: This mount option requires at least version 3.15 Linux kernel. + +FUSE supports ["writeback-cache mode"](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt), which means the `write()` syscall can often complete very fast. It's recommended enable this mount option when write very small data (e.g. 100 bytes) frequently. diff --git a/docs/en/reference/glossary.md b/docs/en/reference/glossary.md new file mode 100644 index 0000000..740a55e --- /dev/null +++ b/docs/en/reference/glossary.md @@ -0,0 +1,4 @@ +# Glossary + +:::note +Working in progress. \ No newline at end of file diff --git a/docs/en/reference/how_juicefs_store_files.md b/docs/en/reference/how_juicefs_store_files.md new file mode 100644 index 0000000..9ad3b17 --- /dev/null +++ b/docs/en/reference/how_juicefs_store_files.md @@ -0,0 +1,20 @@ +--- +sidebar_label: How JuiceFS Stores Files +sidebar_position: 5 +slug: /how_juicefs_store_files +--- +# How JuiceFS Stores Files + +The file system acts as a medium for interaction between the user and the hard drive, which allows files to be stored on the hard drive properly. As you know, Windows commonly used file systems are FAT32, NTFS, Linux commonly used file systems are Ext4, XFS, Btrfs, etc., each file system has its own unique way of organizing and managing files, which determines the file system Features such as storage capacity and performance. + +As a file system, JuiceFS is no exception. Its strong consistency and high performance are inseparable from its unique file management mode. + +Unlike the traditional file system that can only use local disks to store data and corresponding metadata, JuiceFS will format the data and store it in object storage (cloud storage), and store the metadata corresponding to the data in databases such as Redis. . + +Any file stored in JuiceFS will be split into fixed-size **"Chunk"**, and the default upper limit is 64 MiB. Each Chunk is composed of one or more **"Slice"**. The length of the slice is not fixed, depending on the way the file is written. Each slice will be further split into fixed-size **"Block"**, which is 4 MiB by default. Finally, these blocks will be stored in the object storage. At the same time, JuiceFS will store each file and its Chunks, Slices, Blocks and other metadata information in metadata engines. + +![](../images/juicefs-storage-format-new.png) + +Using JuiceFS, files will eventually be split into Chunks, Slices and Blocks and stored in object storage. Therefore, you will find that the source files stored in JuiceFS cannot be found in the file browser of the object storage platform. There is a chunks directory and a bunch of digitally numbered directories and files in the bucket. Don't panic, this is the secret of the high-performance operation of the JuiceFS file system! + +![How JuiceFS stores your files](../images/how-juicefs-stores-files-new.png) diff --git a/docs/en/reference/how_to_setup_metadata_engine.md b/docs/en/reference/how_to_setup_metadata_engine.md new file mode 100644 index 0000000..b75c46f --- /dev/null +++ b/docs/en/reference/how_to_setup_metadata_engine.md @@ -0,0 +1,244 @@ +--- +sidebar_label: How to Setup Metadata Engine +sidebar_position: 3 +slug: /databases_for_metadata +--- +# How to Setup Metadata Engine + +By reading [JuiceFS Technical Architecture](../introduction/architecture.md) and [How JuiceFS Store Files](../reference/how_juicefs_store_files.md), you will understand that JuiceFS is designed to store data and metadata independently. Generally , the data is stored in the cloud storage based on object storage, and the metadata corresponding to the data is stored in an independent database. + +## Metadata Storage Engine + +Metadata and data are equally important. The metadata records the detailed information of each file, such as the name, size, permissions, location, and so on. Especially for this kind of file system where data and metadata are stored separately, the read and write performance of metadata directly determines the actual performance of the file system. + +The metadata storage of JuiceFS uses a multi-engine design. In order to create an ultra-high-performance cloud-native file system, JuiceFS first supports [Redis](https://redis.io) a key-value database running in memory, which makes JuiceFS ten times more powerful than Amazon [ EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) performance, [View test results](../benchmark/benchmark.md) . + +Through active interaction with community users, we found that many application scenarios do not absolutely rely on high performance. Sometimes users just want to temporarily find a convenient tool to reliably migrate data on the cloud, or simply want to mount the object storage locally for a Small-scale use. Therefore, JuiceFS has successively opened up support for more databases such as MySQL/MariaDB and SQLite (some performance comparison are recorded [here](../benchmark/metadata_engines_benchmark.md)). + +**But you need to pay special attention**, in the process of using the JuiceFS file system, no matter which database you choose to store metadata, please **make sure to ensure the security of the metadata**! Once the metadata is damaged or lost, it will directly cause the corresponding data to be completely damaged or lost, and in serious cases may directly cause the entire file system to be damaged. + +:::caution +No matter which database is used to store metadata, **it is important to ensure the security of metadata**. If metadata is corrupted or lost, the corresponding data will be completely corrupted or lost, or even the whole file system will be damaged. For production environments, you should always choose a database with high availability, and at the same time, it is recommended to periodically "[backup metadata](../administration/metadata_dump_load.md)" on a regular basis. +::: + +## Redis + +[Redis](https://redis.io/) is an open source (BSD license) memory-based key-value storage system, often used as a database, cache, and message broker. + +### Create a file system + +When using Redis as the metadata storage engine, the following format is usually used to access the database: + +```shell +redis://username:password@host:6379/1 +``` + +`username` was introduced after Redis 6.0. If you don't have a username field you can ignore it, e.g. `redis://:password@host:6379/1` (the `:` colon in front of the password needs to be preserved). + +For example, the following command will create a JuiceFS file system named `pics`, using the database No. `1` in Redis to store metadata: + +```shell +$ juicefs format --storage s3 \ + ... + "redis://:mypassword@192.168.1.6:6379/1" \ + pics +``` + +For security purposes, it is recommended to pass the password using the environment variable `REDIS_PASSWORD`, e.g. + +```shell +export REDIS_PASSWORD=mypassword +``` + +Then there is no need to set a password in the metadata URL. + +```shell +$ juicefs format --storage s3 \ + ... + "redis://192.168.1.6:6379/1" \ + pics +``` + +:::caution +JuiceFS requires at least 4.0 version for redis +::: + +### Mount a file system + +```shell +sudo juicefs mount -d redis://192.168.1.6:6379/1 /mnt/jfs +``` + +:::tip +If you need to share the same file system on multiple servers, you must ensure that each server has access to the database where the metadata is stored. +::: + +If you maintain your own Redis database, we recommend reading [Redis Best Practices](../administration/metadata/redis_best_practices.md). + +## PostgreSQL + +[PostgreSQL](https://www.postgresql.org/) is a powerful open source relational database with a perfect ecosystem and rich application scenarios, and it is well suited as the metadata engine of JuiceFS. + +Many cloud computing platforms offer hosted PostgreSQL database services, or you can deploy one yourself by following the [Usage Wizard](https://www.postgresqltutorial.com/postgresql-getting-started/). + +Other PostgreSQL-compatible databases (such as CockroachDB) can also be used as metadata engine. + +### Create a file system + +When using PostgreSQL as the metadata storage engine, the following format is usually used to access the database: + +```shell +postgres://[:@][:5432]/[?parameters] +``` + +For example: + +```shell +$ juicefs format --storage s3 \ + ... + "postgres://user:password@192.168.1.6:5432/juicefs" \ + pics +``` + +For security purposes, it is recommended to pass the password using an environment variable, e.g. + +```shell +export $PG_PASSWD=mypassword +``` + +Then change the metadata URL to `"postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs"` + +### Mount a file system + +```shell +sudo juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs" /mnt/jfs +``` + +### Troubleshooting + +The JuiceFS client connects to PostgreSQL via SSL encryption by default; if an error `pq: SSL is not enabled on the server` is returned, it means that SSL is not enabled on the database; you can enable SSL encryption for PostgreSQL according to your business scenario, or you can disable it by adding a parameter to the metadata URL Validation. + +```shell +$ juicefs format --storage s3 \ + ... + "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs?sslmode=disable" \ + pics +``` + +Additional parameters can be appended to the metadata URL, [click here to view](https://pkg.go.dev/github.com/lib/pq#hdr-Connection_String_Parameters). + +## MySQL + +[MySQL](https://www.mysql.com/) is one of the most popular open source relational databases, and is often used as the preferred database for Web applications. + +### Create a file system + +When using MySQL as the metadata storage engine, the following format is usually used to access the database: + +```shell +mysql://:@(:3306)/ +``` + +For example: + +```shell +$ juicefs format --storage s3 \ + ... + "mysql://user:password@(192.168.1.6:3306)/juicefs" \ + pics +``` + +For security purposes, it is recommended to pass the password using an environment variable, e.g. + +```shell +export $MYSQL_PASSWD=mypassword +``` + +Then change the metadata URL to `"mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs"` + +### Mount a file system + +```shell +sudo juicefs mount -d "mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs" /mnt/jfs +``` + +For more examples of MySQL database address format, [click here to view](https://github.com/Go-SQL-Driver/MySQL/#examples). + +## MariaDB + +[MariaDB](https://mariadb.org) is an open source branch of MySQL, maintained by the original developers of MySQL and kept open source. + +Because MariaDB is highly compatible with MySQL, there is no difference in usage, the parameters and settings are exactly the same. + +For example: + +```shell +$ juicefs format --storage s3 \ + ... + "mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs" \ + pics +``` + +## SQLite + +[SQLite](https://sqlite.org) is a widely used small, fast, single-file, reliable, and full-featured SQL database engine. + +The SQLite database has only one file, which is very flexible to create and use. When using it as the JuiceFS metadata storage engine, there is no need to create a database file in advance, and you can directly create a file system: + +```shell +$ juicefs format --storage s3 \ + ... + "sqlite3://my-jfs.db" \ + pics +``` + +Executing the above command will automatically create a database file named `my-jfs.db` in the current directory, **please take good care of this file**! + +Mount the file system: + +```shell +sudo juicefs mount -d "sqlite3://my-jfs.db" +``` + +Please note the location of the database file, if it is not in the current directory, you need to specify the absolute path to the database file, e.g. + +```shell +sudo juicefs mount -d "sqlite3:///home/herald/my-jfs.db" /mnt/jfs/ +``` + +:::note +Since SQLite is a single-file database, usually only the host where the database is located can access it. Therefore, SQLite database is more suitable for stand-alone use. For multiple servers sharing the same file system, it is recommended to use databases such as Redis or MySQL. +::: + +## TiKV + +[TiKV](https://github.com/tikv/tikv) is a distributed transactional key-value database. It is originally developed by [PingCAP](https://pingcap.com) as the storage layer for their flagship product [TiDB](https://github.com/pingcap/tidb). Now TiKV is an independent open source project, and is also a granduated project of [CNCF](https://www.cncf.io/projects). + +With the help of official tool `TiUP`, you can easily build a local playground for testing; refer [here](https://tikv.org/docs/5.1/concepts/tikv-in-5-minutes/) for details. In production, usually at least three hosts are required to store three data replicas; refer to the [official document](https://tikv.org/docs/5.1/deploy/install/install/) for all steps. + +### Create a file system + +When using TiKV as the metadata storage engine, specify parameters as the following format: + +```shell +tikv://[,...]/ +``` + +The `prefix` is a user-defined string, which can be used to distinguish multiple file systems or applications when they share the same TiKV cluster. For example: + +```shell +$ juicefs format --storage s3 \ + ... + "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" \ + pics +``` + +### Mount a file system + +```shell +sudo juicefs mount -d "tikv://192.168.1.6:6379,192.168.1.7:6379,192.168.1.8:6379/jfs" /mnt/jfs +``` + +## FoundationDB + +Coming soon... diff --git a/docs/en/reference/how_to_setup_object_storage.md b/docs/en/reference/how_to_setup_object_storage.md new file mode 100644 index 0000000..054264e --- /dev/null +++ b/docs/en/reference/how_to_setup_object_storage.md @@ -0,0 +1,780 @@ +--- +sidebar_label: How to Setup Object Storage +sidebar_position: 4 +slug: /how_to_setup_object_storage +--- + +# How to Setup Object Storage + +As you can learn from [JuiceFS Technical Architecture](../introduction/architecture.md), JuiceFS is a distributed file system with data and metadata separation, using object storage as the main data storage and Redis, PostgreSQL, MySQL and other databases as metadata storage. + +## Storage options + +When creating a JuiceFS file system, setting up the storage generally involves the following options: + +- `--storage`: Specify the type of storage to be used by the file system, e.g. `--storage s3` +- `--bucket`: Specify the storage access address, e.g. `--bucket https://myjuicefs.s3.us-east-2.amazonaws.com` +- `--access-key` and `--secret-key`: Specify the authentication information when accessing the storage + +For example, the following command uses Amazon S3 object storage to create a file system: + +```shell +$ juicefs format --storage s3 \ + --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \ + --access-key abcdefghijklmn \ + --secret-key nmlkjihgfedAcBdEfg \ + redis://192.168.1.6/1 \ + myjfs +``` + +## Access Key and Secret Key + +In general, object storages are authenticated by `Access Key ID` and `Access Key Secret`, which correspond to the `--access-key` and `--secret-key` options (or AK, SK for short) on the JuiceFS file system. + +In addition to explicitly specifying the `--access-key` and `--secret-key` options when creating a filesystem, it is more secure to pass key information via the `ACCESS_KEY` and `SECRET_KEY` environment variables, e.g. + +```shell +$ export ACCESS_KEY=abcdefghijklmn +$ export SECRET_KEY=nmlkjihgfedAcBdEfg +$ juicefs format --storage s3 \ + --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \ + redis://192.168.1.6/1 \ + myjfs +``` + +Public clouds typically allow users to create IAM (Identity and Access Management) roles, such as [AWS IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) or [Alibaba Cloud RAM role](https://www.alibabacloud.com/help/doc-detail/110376.htm), which can be assigned to VM instances. If the cloud server instance already has read and write access to the object storage, there is no need to specify `--access-key` and `--secret-key`. + +## Using Proxy + +If the network environment where the client is located is affected by firewall policies or other factors that require access to external object storage services through a proxy, the corresponding proxy settings are different for different operating systems, please refer to the corresponding user manual for settings. + +On Linux, for example, the proxy can be set by creating `http_proxy` and `https_proxy` environment variables. + +```shell +$ export http_proxy=http://localhost:8035/ +$ export https_proxy=http://localhost:8035/ +$ juicefs format \ + --storage s3 \ + ... \ + myjfs +``` + +## Supported Object Storage + +If you wish to use a storage type that is not listed, feel free to submit a requirement [issue](https://github.com/juicedata/juicefs/issues). + +| Name | Value | +| --------------------------------------------------------- | ---------- | +| [Amazon S3](#amazon-s3) | `s3` | +| [Google Cloud Storage](#google-cloud-storage) | `gs` | +| [Azure Blob Storage](#azure-blob-storage) | `wasb` | +| [Backblaze B2](#backblaze-b2) | `b2` | +| [IBM Cloud Object Storage](#ibm-cloud-object-storage) | `ibmcos` | +| [Scaleway Object Storage](#scaleway-object-storage) | `scw` | +| [DigitalOcean Spaces](#digitalocean-spaces) | `space` | +| [Wasabi](#wasabi) | `wasabi` | +| [Storj DCS](#storj-dcs) | `s3` | +| [Vultr Object Storage](#vultr-object-storage) | `s3` | +| [Alibaba Cloud OSS](#alibaba-cloud-oss) | `oss` | +| [Tencent Cloud COS](#tencent-cloud-cos) | `cos` | +| [Huawei Cloud OBS](#huawei-cloud-obs) | `obs` | +| [Baidu Object Storage](#baidu-object-storage) | `bos` | +| [Kingsoft KS3](#kingsoft-ks3) | `ks3` | +| [NetEase Object Storage](#netease-object-storage) | `nos` | +| [QingStor](#qingstor) | `qingstor` | +| [Qiniu Object Storage](#qiniu-object-storage) | `qiniu` | +| [Sina Cloud Storage](#sina-cloud-storage) | `scs` | +| [CTYun OOS](#ctyun-oos) | `oos` | +| [ECloud Object Storage](#ecloud-object-storage) | `eos` | +| [UCloud US3](#ucloud-us3) | `ufile` | +| [Ceph RADOS](#ceph-rados) | `ceph` | +| [Ceph RGW](#ceph-rgw) | `s3` | +| [Swift](#swift) | `swift` | +| [MinIO](#minio) | `minio` | +| [WebDAV](#webdav) | `webdav` | +| [HDFS](#hdfs) | `hdfs` | +| [Redis](#redis) | `redis` | +| [TiKV](#tikv) | `tikv` | +| [Local disk](#local-disk) | `file` | + +## Amazon S3 + +S3 supports [two style endpoint URI](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html): virtual hosted-style and path-style. The difference between them is: + +- Virtual hosted-style: `https://.s3..amazonaws.com` +- Path-style: `https://s3..amazonaws.com/` + +The `` should be replaced with specific region code, e.g. the region code of US East (N. Virginia) is `us-east-1`. You could find all available regions at [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions). + +:::note +For AWS China user, you need add `.cn` to the host, i.e. `amazonaws.com.cn`. And check [this document](https://docs.amazonaws.cn/en_us/aws/latest/userguide/endpoints-arns.html) to know your region code. +::: + +:::note +If the S3 bucket has public access (anonymous access is supported), please set `--access-key` to `anonymous`. +::: + +Versions prior to JuiceFS v0.12 only supported the virtual hosting type, v0.12 and later versions support both styles. Example. + +```bash +# virtual hosted-style +$ juicefs format \ + --storage s3 \ + --bucket https://.s3..amazonaws.com \ + ... \ + myjfs +``` + +```bash +# path-style +$ juicefs format \ + --storage s3 \ + --bucket https://s3..amazonaws.com/ \ + ... \ + myjfs +``` + +You can also set `--storage` to `s3` to connect to S3-compatible object storage, e.g. + +```bash +# virtual hosted-style +$ juicefs format \ + --storage s3 \ + --bucket https://. \ + ... \ + myjfs +``` + +```bash +# path-style +$ juicefs format \ + --storage s3 \ + --bucket https:/// \ + ... \ + myjfs +``` + +:::tip +The format of `--bucket` option for all S3 compatible object storage services is `https://.` or `https:///`. The default `region` is `us-east-1`. When a different `region` is required, it can be set manually via the environment variable `AWS_REGION` or `AWS_DEFAULT_REGION`. +::: + +## Google Cloud Storage + +Google Cloud uses [IAM](https://cloud.google.com/iam/docs/overview) to manage the access rights of resources, and through the authorization of [service accounts](https://cloud.google.com/iam/docs/creating-managing- service-accounts#iam-service-accounts-create-gcloud) authorization, you can fine-grained control the access rights of cloud servers and object storage. + +For cloud servers and object storage that belong to the same service account, as long as the account grants access to the relevant resources, there is no need to provide authentication information when creating a JuiceFS file system, and the cloud platform will automatically complete authentication. + +For cases where you want to access the object storage from outside the Google Cloud Platform, for example to create a JuiceFS file system on your local computer using Google Cloud Storage, you need to configure authentication information. Since Google Cloud Storage does not use `Access Key ID` and `Access Key Secret`, but rather the `JSON key file` of the service account to authenticate the identity. + +Please refer to "[Authentication as a service account](https://cloud.google.com/docs/authentication/production)" to create `JSON key file` for the service account and download it to the local computer, and define the path to the key file via `GOOGLE_APPLICATION_ CREDENTIALS` environment variable to define the path to the key file, e.g. + +```shell +export GOOGLE_APPLICATION_CREDENTIALS="$HOME/service-account-file.json" +``` + +You can write the command to create environment variables to `~/.bashrc` or `~/.profile` and have the shell set it automatically every time you start. + +Once you have configured the environment variables for passing key information, the commands to create a file system locally and on Google Cloud Server are identical. For example. + +```bash +$ juicefs format \ + --storage gs \ + --bucket \ + ... \ + myjfs +``` + +As you can see, there is no need to include authentication information in the command, and the client will authenticate the access to the object storage through the JSON key file set in the previous environment variable. Also, since the bucket name is [globally unique](https://cloud.google.com/storage/docs/naming-buckets#considerations), when creating a file system, the `--bucket` option only needs to specify the bucket name. + +## Azure Blob Storage + +Besides provide authorization information through `--access-key` and `--secret-key` options, you could also create a [connection string](https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string) and set `AZURE_STORAGE_CONNECTION_STRING` environment variable. For example: + +```bash +# Use connection string +$ export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=XXX;AccountKey=XXX;EndpointSuffix=core.windows.net" +$ juicefs format \ + --storage wasb \ + --bucket https:// \ + ... \ + myjfs +``` + +:::note +For Azure China user, the value of `EndpointSuffix` is `core.chinacloudapi.cn`. +::: + +## Backblaze B2 + +To use Backblaze B2 as a data storage for JuiceFS, you need to create [application key](https://www.backblaze.com/b2/docs/application_keys.html) first, **Application Key ID** and ** Application Key** corresponds to `Access key` and `Secret key` respectively. + +Backblaze B2 supports two access interfaces: the B2 native API and the S3-compatible API. + +### B2 native API + +The storage type should be set to `b2` and `--bucket` should only set the bucket name. For example: + +```bash +$ juicefs format \ + --storage b2 \ + --bucket \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +### S3-compatible API + +The storage type should be set to `s3` and `--bucket` should specify the full bucket address. For example: + +```bash +$ juicefs format \ + --storage s3 \ + --bucket https://s3.eu-central-003.backblazeb2.com/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## IBM Cloud Object Storage + +You need first creating [API key](https://cloud.ibm.com/docs/account?topic=account-manapikey) and retrieving [instance ID](https://cloud.ibm.com/docs/key-protect?topic=key-protect-retrieve-instance-ID). The "API key" and "instance ID" are the equivalent of access key and secret key respectively. + +IBM Cloud Object Storage provides [multiple endpoints](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints) for each region, depends on your network (e.g. public or private network), you should use appropriate endpoint. For example: + +```bash +$ juicefs format \ + --storage ibmcos \ + --bucket https://. \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Scaleway Object Storage + +Please follow [this document](https://www.scaleway.com/en/docs/generate-api-keys) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.s3..scw.cloud`, replace `` with specific region code, e.g. the region code of "Amsterdam, The Netherlands" is `nl-ams`. You could find all available regions at [here](https://www.scaleway.com/en/docs/object-storage-feature/#-Core-Concepts). For example: + +```bash +$ juicefs format \ + --storage scw \ + --bucket https://.s3..scw.cloud \ + ... \ + myjfs +``` + +## DigitalOcean Spaces + +Please follow [this document](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) to learn how to get access key and secret key. + +The `--bucket` option format is `https://..digitaloceanspaces.com`, replace `` with specific region code, e.g. `nyc3`. You could find all available regions at [here](https://www.digitalocean.com/docs/spaces/#regional-availability). For example: + +```bash +$ juicefs format \ + --storage space \ + --bucket https://..digitaloceanspaces.com \ + ... \ + myjfs +``` + +## Wasabi + +Please follow [this document](https://wasabi-support.zendesk.com/hc/en-us/articles/360019677192-Creating-a-Root-Access-Key-and-Secret-Key) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.s3..wasabisys.com`, replace `` with specific region code, e.g. the region code of US East 1 (N. Virginia) is `us-east-1`. You could find all available regions at [here](https://wasabi-support.zendesk.com/hc/en-us/articles/360.15.26031-What-are-the-service-URLs-for-Wasabi-s-different-regions-). For example: + +```bash +$ juicefs format \ + --storage wasabi \ + --bucket https://.s3..wasabisys.com \ + ... \ + myjfs +``` + +:::note +For Tokyo (ap-northeast-1) region user, see [this document](https://wasabi-support.zendesk.com/hc/en-us/articles/360039372392-How-do-I-access-the-Wasabi-Tokyo-ap-northeast-1-storage-region-) to learn how to get appropriate endpoint URI.*** +::: + +## Storj DCS + +Please refer to [this document](https://docs.storj.io/api-reference/s3-compatible-gateway) to learn how to create access key and secret key. + +Storj DCS is an S3-compatible storage, just use `s3` for `--storage` option. The setting format of the `--bucket` option is `https://gateway..storjshare.io/`, please replace `` with the storage region you actually use. There are currently three avaliable regions: `us1`, `ap1` and `eu1`. For example: + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://gateway..storjshare.io/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Vultr Object Storage + +Vultr Object Storage is an S3-compatible storage, use `s3` for `--storage` option. The `--bucket` option is `https://..vultrobjects.com/`. For example: + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://.ewr1.vultrobjects.com/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +Please find the access and secret keys for object storage [in the customer portal](https://my.vultr.com/objectstorage/). + +## Alibaba Cloud OSS + +Please follow [this document](https://www.alibabacloud.com/help/doc-detail/125558.htm) to learn how to get access key and secret key. And if you already created [RAM role](https://www.alibabacloud.com/help/doc-detail/110376.htm) and assign it to VM instance, you could omit `--access-key` and `--secret-key` options. + +Alibaba Cloud also supports use [Security Token Service (STS)](https://www.alibabacloud.com/help/doc-detail/100624.htm) to authorize temporary access to OSS. If you wanna use STS, you should omit `--access-key` and `--secret-key` options and set `ALICLOUD_ACCESS_KEY_ID`, `ALICLOUD_ACCESS_KEY_SECRET`, `SECURITY_TOKEN` environment variables instead, for example: + +```bash +# Use Security Token Service (STS) +$ export ALICLOUD_ACCESS_KEY_ID=XXX +$ export ALICLOUD_ACCESS_KEY_SECRET=XXX +$ export SECURITY_TOKEN=XXX +$ juicefs format \ + --storage oss \ + --bucket https://. \ + ... \ + myjfs +``` + +OSS provides [multiple endpoints](https://www.alibabacloud.com/help/doc-detail/31834.htm) for each region, depends on your network (e.g. public or internal network), you should use appropriate endpoint. + +If you are creating a filesystem on AliCloud's server, you can specify the bucket name directly in the `--bucket` option. For example. + +```bash +# Running within Alibaba Cloud +$ juicefs format \ + --storage oss \ + --bucket \ + ... \ + myjfs +``` + +## Tencent Cloud COS + +The naming rule of bucket in Tencent Cloud is `-`, so you must append `APPID` to the bucket name. Please follow [this document](https://intl.cloud.tencent.com/document/product/436/13312) to learn how to get `APPID`. + +The full format of `--bucket` option is `https://-.cos..myqcloud.com`, replace `` with specific region code, e.g. the region code of Shanghai is `ap-shanghai`. You could find all available regions at [here](https://intl.cloud.tencent.com/document/product/436/6224). For example: + +```bash +$ juicefs format \ + --storage cos \ + --bucket https://-.cos..myqcloud.com \ + ... \ + myjfs +``` + +If you are creating a file system on Tencent Cloud's server, you can specify the bucket name directly in the `--bucket` option. For example. + +```bash +# Running within Tencent Cloud +$ juicefs format \ + --storage cos \ + --bucket - \ + ... \ + myjfs +``` + +## Huawei Cloud OBS + +Please follow [this document](https://support.huaweicloud.com/usermanual-ca/zh-cn_topic_0046606340.html) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.obs..myhuaweicloud.com`, replace `` with specific region code, e.g. the region code of Beijing 1 is `cn-north-1`. You could find all available regions at [here](https://developer.huaweicloud.com/endpoint?OBS). For example: + +```bash +$ juicefs format \ + --storage obs \ + --bucket https://.obs..myhuaweicloud.com \ + ... \ + myjfs +``` + +If you create the file system on Huawei Cloud's server, you can specify the bucket name directly in `--bucket`. For example. + +```bash +# Running within Huawei Cloud +$ juicefs format \ + --storage obs \ + --bucket \ + ... \ + myjfs +``` + +## Baidu Object Storage + +Please follow [this document](https://cloud.baidu.com/doc/Reference/s/9jwvz2egb) to learn how to get access key and secret key. + +The `--bucket` option format is `https://..bcebos.com`, replace `` with specific region code, e.g. the region code of Beijing is `bj`. You could find all available regions at [here](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D%EF%BC%88endpoint%EF%BC%89). For example: + +```bash +$ juicefs format \ + --storage bos \ + --bucket https://..bcebos.com \ + ... \ + myjfs +``` + +If you are creating a file system on Baidu Cloud's server, you can specify the bucket name directly in `--bucket`. For example. + +```bash +# Running within Baidu Cloud +$ juicefs format \ + --storage bos \ + --bucket \ + ... \ + myjfs +``` + +## Kingsoft Cloud KS3 + +Please follow [this document](https://docs.ksyun.com/documents/1386) to learn how to get access key and secret key. + +KS3 provides [multiple endpoints](https://docs.ksyun.com/documents/6761) for each region, depends on your network (e.g. public or internal network), you should use appropriate endpoint. For example: + +```bash +$ juicefs format \ + --storage ks3 \ + --bucket https://. \ + ... \ + myjfs +``` + +## Mtyun Storage Service + +Please follow [this document](https://www.mtyun.com/doc/api/mss/mss/fang-wen-kong-zhi) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.`, replace `` with specific value, e.g. `mtmss.com`. You could find all available endpoints at [here](https://www.mtyun.com/doc/products/storage/mss/index#%E5%8F%AF%E7%94%A8%E5%8C%BA%E5%9F%9F). For example: + +```bash +$ juicefs format \ + --storage mss \ + --bucket https://. \ + ... \ + myjfs +``` + +## NetEase Object Storage + +Please follow [this document](https://www.163yun.com/help/documents/55485278220111872) to learn how to get access key and secret key. + +NOS provides [multiple endpoints](https://www.163yun.com/help/documents/67078583131230208) for each region, depends on your network (e.g. public or internal network), you should use appropriate endpoint. For example: + +```bash +$ juicefs format \ + --storage nos \ + --bucket https://. \ + ... \ + myjfs +``` + +## QingStor + +Please follow [this document](https://docsv3.qingcloud.com/storage/object-storage/api/practices/signature/#%E8%8E%B7%E5%8F%96-access-key) to learn how to get access key and secret key. + +The `--bucket` option format is `https://..qingstor.com`, replace `` with specific region code, e.g. the region code of Beijing 3-A is `pek3a`. You could find all available regions at [here](https://docs.qingcloud.com/qingstor/#%E5%8C%BA%E5%9F%9F%E5%8F%8A%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D). For example: + +```bash +$ juicefs format \ + --storage qingstor \ + --bucket https://..qingstor.com \ + ... \ + myjfs +``` + +:::note +The format of `--bucket` option for all QingStor compatible object storage services is `http://.`. +::: + +## Qiniu + +Please follow [this document](https://developer.qiniu.com/af/kb/1479/how-to-access-or-locate-the-access-key-and-secret-key) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.s3-.qiniucs.com`, replace `` with specific region code, e.g. the region code of China East is `cn-east-1`. You could find all available regions at [here](https://developer.qiniu.com/kodo/4088/s3-access-domainname). For example: + +```bash +$ juicefs format \ + --storage qiniu \ + --bucket https://.s3-.qiniucs.com \ + ... \ + myjfs +``` + +## Sina Cloud Storage + +Please follow [this document](https://scs.sinacloud.com/doc/scs/guide/quick_start#accesskey) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.stor.sinaapp.com`. For example: + +```bash +$ juicefs format \ + --storage scs \ + --bucket https://.stor.sinaapp.com \ + ... \ + myjfs +``` + +## CTYun OOS + +Please follow [this document](https://www.ctyun.cn/help2/10000101/10473683) to learn how to get access key and secret key. + +The `--bucket` option format is `https://.oss-.ctyunapi.cn`, replace `` with specific region code, e.g. the region code of Chengdu is `sccd`. You could find all available regions at [here](https://www.ctyun.cn/help2/10000101/10474062). For example: + +```bash +$ juicefs format \ + --storage oos \ + --bucket https://.oss-.ctyunapi.cn \ + ... \ + myjfs +``` + +## ECloud Object Storage + +Please follow [this document](https://ecloud.10086.cn/op-help-center/doc/article/24501) to learn how to get access key and secret key. + +ECloud Object Storage provides [multiple endpoints](https://ecloud.10086.cn/op-help-center/doc/article/40956) for each region, depends on your network (e.g. public or internal network), you should use appropriate endpoint. For example: + +```bash +$ juicefs format \ + --storage eos \ + --bucket https://. \ + ... \ + myjfs +``` + +## UCloud US3 + +Please follow [this document](https://docs.ucloud.cn/uai-censor/access/key) to learn how to get access key and secret key. + +US3 (formerly UFile) provides [multiple endpoints](https://docs.ucloud.cn/ufile/introduction/region) for each region, depends on your network (e.g. public or internal network), you should use appropriate endpoint. For example: + +```bash +$ juicefs format \ + --storage ufile \ + --bucket https://. \ + ... \ + myjfs +``` + +## Ceph RADOS + +:::note +The minimum version of Ceph supported by JuiceFS is Luminous (v12.2.*), please make sure your version of Ceph meets the requirements. +::: + +The [Ceph Storage Cluster](https://docs.ceph.com/en/latest/rados) has a messaging layer protocol that enables clients to interact with a Ceph Monitor and a Ceph OSD Daemon. The [`librados`](https://docs.ceph.com/en/latest/rados/api/librados-intro) API enables you to interact with the two types of daemons: + +- The [Ceph Monitor](https://docs.ceph.com/en/latest/rados/configuration/common/#monitors), which maintains a master copy of the cluster map. +- The [Ceph OSD Daemon (OSD)](https://docs.ceph.com/en/latest/rados/configuration/common/#osds), which stores data as objects on a storage node. + +JuiceFS supports the use of native Ceph APIs based on `librados`. You need install `librados` library and build `juicefs` binary separately. + +First installing `librados`: + +:::note +It is recommended to use `librados` that matches your Ceph version, e.g. if Ceph version is Octopus (v15.2.\*), then `librados` is also recommended to use v15.2.\*. Some Linux distributions (e.g. CentOS 7) may come with a lower version of `librados`, so if you fail to compile JuiceFS try downloading a higher version of the package. +::: + +```bash +# Debian based system +$ sudo apt-get install librados-dev + +# RPM based system +$ sudo yum install librados2-devel +``` + +Then compile JuiceFS for Ceph (ensure you have Go 1.16+ and GCC 5.4+): + +```bash +$ make juicefs.ceph +``` + +The `--bucket` option format is `ceph://`. A [pool](https://docs.ceph.com/en/latest/rados/operations/pools) is logical partition for storing objects. You may need first creating a pool. The value of `--access-key` option is Ceph cluster name, the default cluster name is `ceph`. The value of `--secret-key` option is [Ceph client user name](https://docs.ceph.com/en/latest/rados/operations/user-management), the default user name is `client.admin`. + +For connect to Ceph Monitor, `librados` will read Ceph configuration file by search default locations and the first found is used. The locations are: + +- `CEPH_CONF` environment variable +- `/etc/ceph/ceph.conf` +- `~/.ceph/config` +- `ceph.conf` in the current working directory + +The example command is: + +```bash +$ juicefs.ceph format \ + --storage ceph \ + --bucket ceph:// \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Ceph RGW + +[Ceph Object Gateway](https://ceph.io/ceph-storage/object-storage) is an object storage interface built on top of `librados` to provide applications with a RESTful gateway to Ceph Storage Clusters. Ceph Object Gateway supports S3-compatible interface, so we could set `--storage` to `s3` directly. + +The `--bucket` option format is `http://.` (virtual hosted-style). For example: + +```bash +$ juicefs format \ + --storage s3 \ + --bucket http://. \ + ... \ + myjfs +``` + +## Swift + +[OpenStack Swift](https://github.com/openstack/swift) is a distributed object storage system designed to scale from a single machine to thousands of servers. Swift is optimized for multi-tenancy and high concurrency. Swift is ideal for backups, web and mobile content, and any other unstructured data that can grow without bound. + +The `--bucket` option format is `http://.`. A container defines a namespace for objects. + +**Currently, JuiceFS only supports [Swift V1 authentication](https://www.swiftstack.com/docs/cookbooks/swift_usage/auth.html).** + +The value of `--access-key` option is username. The value of `--secret-key` option is password. For example: + +```bash +$ juicefs format \ + --storage swift \ + --bucket http://. \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## MinIO + +[MinIO](https://min.io) is an open source lightweight object storage, compatible with Amazon S3 API. + +It is easy to run a MinIO object store instance locally using Docker. For example, the following command sets and maps port `9900` for the console with `-console-address ":9900"` and also maps the data path for the MinIO object store to the `minio-data` folder in the current directory, which you can modify as needed. + +```shell +$ sudo docker run -d --name minio \ + -p 9000:9000 \ + -p 9900:9900 \ + -e "MINIO_ROOT_USER=minioadmin" \ + -e "MINIO_ROOT_PASSWORD=minioadmin" \ + -v $PWD/minio-data:/data \ + --restart unless-stopped \ + minio/minio server /data --console-address ":9900" +``` + +It is accessed using the following address: + +- **MinIO UI**:[http://127.0.0.1:9900](http://127.0.0.1:9900/) +- **MinIO API**:[http://127.0.0.1:9000](http://127.0.0.1:9000/) + +The initial Access Key and Secret Key of the object store are both `minioadmin`. + +Use MinIO as data storage for JuiceFS, with the `--storage` option set to `minio`. + +```bash +$ juicefs format \ + --storage minio \ + --bucket http://127.0.0.1:9000/ \ + --access-key minioadmin \ + --secret-key minioadmin \ + ... \ + myjfs +``` + +:::note +Currently, JuiceFS only supports path-style MinIO URI addresses, e.g., `http://127.0.0.1:9000/myjfs`. +::: + +## WebDAV + +[WebDAV](https://en.wikipedia.org/wiki/WebDAV) is an extension of the Hypertext Transfer Protocol (HTTP) +that facilitates collaborative editing and management of documents stored on the WWW server among users. +Starting from JuiceFS v0.15+, for a storage that speaks WebDAV, JuiceFS can use it as the data store. + +You need set `--storage` to `webdav`, and `--bucket` to the endpoint of WebDAV. If basic authorization is enable, username and password should be provided as `--access-key` and `--secret-key`, for example: + +```bash +$ juicefs format \ + --storage webdav \ + --bucket http:/// \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## HDFS + +[HDFS](https://hadoop.apache.org) is the file system for Hadoop, which can be used as the object store for JuiceFS. + +When HDFS is used, `--access-key` can be used to specify the `username`, and `hdfs` is usually the default superuser. For example: + +```bash +$ juicefs format \ + --storage hdfs \ + --bucket namenode1:8020 \ + --access-key hdfs \ + ... \ + myjfs +``` + +When the `--access-key` is not specified during formatting, JuiceFS will use the current user of `juicefs mount` or Hadoop SDK to access HDFS. It will hang and fail with IO error eventually, if the current user don't have enough permission to read/write the blocks in HDFS. + +JuiceFS will try to load configurations for HDFS client based on `$HADOOP_CONF_DIR` or `$HADOOP_HOME`. If an empty value is provided to `--bucket`, the default HDFS found in Hadoop configurations will be used. + +For HA cluster, the addresses of NameNodes can be specified together like this: `--bucket=namenode1:port,namenode2:port`. + +## Redis + +[Redis](https://redis.io) can be used as both a metadata storage for JuiceFS and as a data storage, but when using Redis as a data storage, it is recommended not to store large scale data. + +The `--bucket` option format is `redis://:/`. The value of `--access-key` option is username. The value of `--secret-key` option is password. For example: + +```bash +$ juicefs format \ + --storage redis \ + --bucket redis://:/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## TiKV + +[TiKV](https://tikv.org) is a highly scalable, low latency, and easy to use key-value database. It provides both raw and ACID-compliant transactional key-value API. + +TiKV can be used as both metadata storage and data storage for JuiceFS. + +The `--bucket` option format is like `:,:,:`, the `` is the address of Placement Driver (PD). The `--access-key` and `--secret-key` options have no effect and can be omitted. For example: + +```bash +$ juicefs format \ + --storage tikv \ + --bucket ":,:,:" \ + ... \ + myjfs +``` + +## Local disk + +When creating JuiceFS storage, if no storage type is specified, the local disk will be used to store data by default. The default storage path for root user is `/var/jfs`, and `~/.juicefs/local` is for ordinary users. + +For example, using the local Redis database and local disk to create a JuiceFS storage named `test`: + +```shell +$ juicefs format redis://localhost:6379/1 test +``` + +Local storage is usually only used to understand and experience the basic features of JuiceFS. The created JuiceFS storage cannot be mounted by other clients within the network and can only be used on a single machine. diff --git a/docs/en/reference/p8s_metrics.md b/docs/en/reference/p8s_metrics.md new file mode 100644 index 0000000..a2d6867 --- /dev/null +++ b/docs/en/reference/p8s_metrics.md @@ -0,0 +1,118 @@ +--- +sidebar_label: JuiceFS Metrics +sidebar_position: 2 +slug: /p8s_metrics +--- + +# JuiceFS Metrics + +:::tip +Please see the ["Monitoring"](../administration/monitoring.md) documentation to learn how to collect and display JuiceFS monitoring metrics. +::: + +## Global labels + +| Name | Description | +| ---- | ----------- | +| `vol_name` | Volume name | +| `mp` | Mount point path | + +:::info +When Prometheus scrapes a target, it attaches `instance` label automatically to the scraped time series which serve to identify the scraped target, its format is `:`. Refer to [official document](https://prometheus.io/docs/concepts/jobs_instances) for more information. +::: + +:::info +If the monitoring metrics are reported through [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) (for example, [JuiceFS Hadoop Java SDK](../administration/monitoring.md#hadoop)), the value of the `mp` label is `sdk-`, and the value of the `instance` label is the host name. +::: + +## File system + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_used_space` | Total used space | byte | +| `juicefs_used_inodes` | Total number of inodes | | + +## Operating system + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_uptime` | Total running time | second | +| `juicefs_cpu_usage` | Accumulated CPU usage | second | +| `juicefs_memory` | Used memory | byte | + +## Metadata engine + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_transaction_durations_histogram_seconds` | Transactions latency distributions | second | +| `juicefs_transaction_restart` | Number of times a transaction is restarted | | + +## FUSE + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_fuse_read_size_bytes` | Size distributions of read request | byte | +| `juicefs_fuse_written_size_bytes` | Size distributions of write request | byte | +| `juicefs_fuse_ops_durations_histogram_seconds` | Operations latency distributions | second | +| `juicefs_fuse_open_handlers` | Number of open files and directories | | + +## SDK + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_sdk_read_size_bytes` | Size distributions of read request | byte | +| `juicefs_sdk_written_size_bytes` | Size distributions of write request | byte | +| `juicefs_sdk_ops_durations_histogram_seconds` | Operations latency distributions | second | + +## Cache + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_blockcache_blocks` | Number of cached blocks | | +| `juicefs_blockcache_bytes` | Size of cached blocks | byte | +| `juicefs_blockcache_hits` | Count of cached block hits | | +| `juicefs_blockcache_miss` | Count of cached block miss | | +| `juicefs_blockcache_writes` | Count of cached block writes | | +| `juicefs_blockcache_drops` | Count of cached block drops | | +| `juicefs_blockcache_evicts` | Count of cached block evicts | | +| `juicefs_blockcache_hit_bytes` | Size of cached block hits | byte | +| `juicefs_blockcache_miss_bytes` | Size of cached block miss | byte | +| `juicefs_blockcache_write_bytes` | Size of cached block writes | byte | +| `juicefs_blockcache_read_hist_seconds` | Latency distributions of read cached block | second | +| `juicefs_blockcache_write_hist_seconds` | Latency distributions of write cached block | second | + +## Object storage + +### Labels + +| Name | Description | +| ---- | ----------- | +| `method` | Request method to object storage (e.g. GET, PUT, HEAD, DELETE) | + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_object_request_durations_histogram_seconds` | Object storage request latency distributions | second | +| `juicefs_object_request_errors` | Count of failed requests to object storage | | +| `juicefs_object_request_data_bytes` | Size of requests to object storage | byte | + +## Internal + +### Metrics + +| Name | Description | Unit | +| ---- | ----------- | ---- | +| `juicefs_compact_size_histogram_bytes` | Size distributions of compacted data | byte | diff --git a/docs/en/reference/posix_compatibility.md b/docs/en/reference/posix_compatibility.md new file mode 100644 index 0000000..db452d7 --- /dev/null +++ b/docs/en/reference/posix_compatibility.md @@ -0,0 +1,293 @@ +--- +sidebar_label: POSIX Compatibility +sidebar_position: 6 +slug: /posix_compatibility +--- +# POSIX Compatibility + +JuiceFS ensures POSIX compatibility with the help of pjdfstest and LTP. + +## Pjdfstest + + [Pjdfstest](https://github.com/pjd/pjdfstest) is a test suite that helps exercise POSIX system calls. JuiceFS passed all of its latest 8813 tests: + +``` +All tests successful. + +Test Summary Report +------------------- +/root/soft/pjdfstest/tests/chown/00.t (Wstat: 0 Tests: 1323 Failed: 0) + TODO passed: 693, 697, 708-709, 714-715, 729, 733 +Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr 0.38 sys + 2.57 cusr 3.93 csys = 9.65 CPU) +Result: PASS +``` + +Besides the things covered by pjdfstest, JuiceFS provides: + +- Close-to-open consistency. Once a file is closed, the following open and read are guaranteed see the data written before close. Within same mount point, read can see all data written before it immediately. +- Rename and all other metadata operations are atomic guaranteed by transaction of metadata engines. +- Open files remain accessible after unlink from same mount point. +- Mmap is supported (tested with FSx). +- Fallocate with punch hole support. +- Extended attributes (xattr). +- BSD locks (flock). +- POSIX record locks (fcntl). + +## LTP + +[LTP](https://github.com/linux-test-project/ltp) (Linux Test Project) is a joint project developed and maintained by IBM, Cisco, Fujitsu and others. + +> The project goal is to deliver tests to the open source community that validate the reliability, robustness, and stability of Linux. +> +> The LTP testsuite contains a collection of tools for testing the Linux kernel and related features. Our goal is to improve the Linux kernel and system libraries by bringing test automation to the testing effort. + +JuiceFS passed most of its file system related tests. + +### Test Environment + +- Host: Amazon EC2: c5d.xlarge (4C 8G) +- OS: Ubuntu 20.04.1 LTS (Kernel 5.4.0-1029-aws) +- Object storage: Amazon S3 +- JuiceFS version: 0.17-dev (2021-09-16 292f2b65) + +### Test Steps + +1. Download LTP [release](https://github.com/linux-test-project/ltp/releases/download/20210524/ltp-full-20210524.tar.bz2) from GitHub +2. Unarchive, compile and install: + +```bash +$ tar -jvxf ltp-full-20210524.tar.bz2 +$ cd ltp-full-20210524 +$ ./configure +$ make all +$ make install +``` + +3. Change directory to `/opt/ltp` since test tools are installed here: + +```bash +$ cd /opt/ltp +``` + +The test definition files are located under `runtest`. To speed up testing, we delete some pressure cases and unrelated cases in `fs` and `syscalls` (refer to [Appendix](#Appendix), modified files are saved as `fs-jfs` and `syscalls-jfs`), then execute: + +```bash +$ ./runltp -d /mnt/jfs -f fs_bind,fs_perms_simple,fsx,io,smoketest,fs-jfs,syscalls-jfs +``` + +### Test Result + +```bash +Testcase Result Exit Value +-------- ------ ---------- +fcntl17 FAIL 7 +fcntl17_64 FAIL 7 +getxattr05 CONF 32 +ioctl_loop05 FAIL 4 +ioctl_ns07 FAIL 1 +lseek11 CONF 32 +open14 CONF 32 +openat03 CONF 32 +setxattr03 FAIL 6 + +----------------------------------------------- +Total Tests: 1270 +Total Skipped Tests: 4 +Total Failures: 5 +Kernel Version: 5.4.0-1029-aws +Machine Architecture: x86_64 +``` + +Reasons for the skipped and failed tests: + +- fcntl17,fcntl17_64: automatically detect deadlock when trying to add POSIX locks. JuiceFS doesn't support it yet +- getxattr05: need ACL, which is not supported yet +- ioctl_loop05,ioctl_ns07,setxattr03: need `ioctl`, which is not supported yet +- lseek11: handle SEEK_DATA and SEEK_HOLE flags properly in `lseek`. JuiceFS uses kernel general function, which doesn't support these two flags +- open14,openat03: handle O_TMPFILE flag in `open`. JuiceFS can do nothing with it since it's not supported by FUSE + +### Appendix + +Deleted cases in `fs` and `syscalls`: + +```bash +# fs --> fs-jfs +gf01 growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 -f glseek20 -S 2 -d $TMPDIR +gf02 growfiles -W gf02 -b -e 1 -L 10 -i 100 -I p -S 2 -u -f gf03_ -d $TMPDIR +gf03 growfiles -W gf03 -b -e 1 -g 1 -i 1 -S 150 -u -f gf05_ -d $TMPDIR +gf04 growfiles -W gf04 -b -e 1 -g 4090 -i 500 -t 39000 -u -f gf06_ -d $TMPDIR +gf05 growfiles -W gf05 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -u -f gf07_ -d $TMPDIR +gf06 growfiles -W gf06 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -f g_rand10 -S 2 -d $TMPDIR +gf07 growfiles -W gf07 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -I p -f g_rand13 -S 2 -d $TMPDIR +gf08 growfiles -W gf08 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -f g_rand11 -S 2 -d $TMPDIR +gf09 growfiles -W gf09 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -I p -f g_rand12 -S 2 -d $TMPDIR +gf10 growfiles -W gf10 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I l -f g_lio14 -S 2 -d $TMPDIR +gf11 growfiles -W gf11 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I L -f g_lio15 -S 2 -d $TMPDIR +gf12 mkfifo $TMPDIR/gffifo17; growfiles -b -W gf12 -e 1 -u -i 0 -L 30 $TMPDIR/gffifo17 +gf13 mkfifo $TMPDIR/gffifo18; growfiles -b -W gf13 -e 1 -u -i 0 -L 30 -I r -r 1-4096 $TMPDIR/gffifo18 +gf14 growfiles -W gf14 -b -e 1 -u -i 0 -L 20 -w -l -C 1 -T 10 -f glseek19 -S 2 -d $TMPDIR +gf15 growfiles -W gf15 -b -e 1 -u -r 1-49600 -I r -u -i 0 -L 120 -f Lgfile1 -d $TMPDIR +gf16 growfiles -W gf16 -b -e 1 -i 0 -L 120 -u -g 4090 -T 101 -t 408990 -l -C 10 -c 1000 -S 10 -f Lgf02_ -d $TMPDIR +gf17 growfiles -W gf17 -b -e 1 -i 0 -L 120 -u -g 5000 -T 101 -t 499990 -l -C 10 -c 1000 -S 10 -f Lgf03_ -d $TMPDIR +gf18 growfiles -W gf18 -b -e 1 -i 0 -L 120 -w -u -r 10-5000 -I r -l -S 2 -f Lgf04_ -d $TMPDIR +gf19 growfiles -W gf19 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -o O_RDWR,O_CREAT,O_TRUNC -u -f gf08i_ -d $TMPDIR +gf20 growfiles -W gf20 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1-256000:512 -R 512-256000 -T 4 -f gfbigio-$$ -d $TMPDIR +gf21 growfiles -W gf21 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bld-$$ -d $TMPDIR +gf22 growfiles -W gf22 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bldf-$$ -d $TMPDIR +gf23 growfiles -W gf23 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 512-64000:1024 -R 1-384000 -T 4 -f gf-inf-$$ -d $TMPDIR +gf24 growfiles -W gf24 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -f gf-jbld-$$ -d $TMPDIR +gf25 growfiles -W gf25 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1024000-2048000:2048 -R 4095-2048000 -T 1 -f gf-large-gs-$$ -d $TMPDIR +gf26 growfiles -W gf26 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 128-32768:128 -R 512-64000 -T 4 -f gfsmallio-$$ -d $TMPDIR +gf27 growfiles -W gf27 -b -D 0 -w -g 8b -C 1 -b -i 1000 -u -f gfsparse-1-$$ -d $TMPDIR +gf28 growfiles -W gf28 -b -D 0 -w -g 16b -C 1 -b -i 1000 -u -f gfsparse-2-$$ -d $TMPDIR +gf29 growfiles -W gf29 -b -D 0 -r 1-4096 -R 0-33554432 -i 0 -L 60 -C 1 -u -f gfsparse-3-$$ -d $TMPDIR +gf30 growfiles -W gf30 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -o O_RDWR,O_CREAT,O_SYNC -g 20480 -T 10 -t 20480 -f gf-sync-$$ -d $TMPDIR +rwtest01 export LTPROOT; rwtest -N rwtest01 -c -q -i 60s -f sync 10%25000:$TMPDIR/rw-sync-$$ +rwtest02 export LTPROOT; rwtest -N rwtest02 -c -q -i 60s -f buffered 10%25000:$TMPDIR/rw-buffered-$$ +rwtest03 export LTPROOT; rwtest -N rwtest03 -c -q -i 60s -n 2 -f buffered -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-buff-$$ +rwtest04 export LTPROOT; rwtest -N rwtest04 -c -q -i 60s -n 2 -f sync -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-sync-$$ +rwtest05 export LTPROOT; rwtest -N rwtest05 -c -q -i 50 -T 64b 500b:$TMPDIR/rwtest01%f +iogen01 export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$ +quota_remount_test01 quota_remount_test01.sh +isofs isofs.sh + +# syscalls --> syscalls-jfs +bpf_prog05 bpf_prog05 +cacheflush01 cacheflush01 +chown01_16 chown01_16 +chown02_16 chown02_16 +chown03_16 chown03_16 +chown04_16 chown04_16 +chown05_16 chown05_16 +clock_nanosleep03 clock_nanosleep03 +clock_gettime03 clock_gettime03 +leapsec01 leapsec01 +close_range01 close_range01 +close_range02 close_range02 +fallocate06 fallocate06 +fchown01_16 fchown01_16 +fchown02_16 fchown02_16 +fchown03_16 fchown03_16 +fchown04_16 fchown04_16 +fchown05_16 fchown05_16 +fcntl06 fcntl06 +fcntl06_64 fcntl06_64 +getegid01_16 getegid01_16 +getegid02_16 getegid02_16 +geteuid01_16 geteuid01_16 +geteuid02_16 geteuid02_16 +getgid01_16 getgid01_16 +getgid03_16 getgid03_16 +getgroups01_16 getgroups01_16 +getgroups03_16 getgroups03_16 +getresgid01_16 getresgid01_16 +getresgid02_16 getresgid02_16 +getresgid03_16 getresgid03_16 +getresuid01_16 getresuid01_16 +getresuid02_16 getresuid02_16 +getresuid03_16 getresuid03_16 +getrusage04 getrusage04 +getuid01_16 getuid01_16 +getuid03_16 getuid03_16 +ioctl_sg01 ioctl_sg01 +fanotify16 fanotify16 +fanotify18 fanotify18 +fanotify19 fanotify19 +lchown01_16 lchown01_16 +lchown02_16 lchown02_16 +lchown03_16 lchown03_16 +mbind02 mbind02 +mbind03 mbind03 +mbind04 mbind04 +migrate_pages02 migrate_pages02 +migrate_pages03 migrate_pages03 +modify_ldt01 modify_ldt01 +modify_ldt02 modify_ldt02 +modify_ldt03 modify_ldt03 +move_pages01 move_pages01 +move_pages02 move_pages02 +move_pages03 move_pages03 +move_pages04 move_pages04 +move_pages05 move_pages05 +move_pages06 move_pages06 +move_pages07 move_pages07 +move_pages09 move_pages09 +move_pages10 move_pages10 +move_pages11 move_pages11 +move_pages12 move_pages12 +msgctl05 msgctl05 +msgstress04 msgstress04 +openat201 openat201 +openat202 openat202 +openat203 openat203 +madvise06 madvise06 +madvise09 madvise09 +ptrace04 ptrace04 +quotactl01 quotactl01 +quotactl04 quotactl04 +quotactl06 quotactl06 +readdir21 readdir21 +recvmsg03 recvmsg03 +sbrk03 sbrk03 +semctl08 semctl08 +semctl09 semctl09 +set_mempolicy01 set_mempolicy01 +set_mempolicy02 set_mempolicy02 +set_mempolicy03 set_mempolicy03 +set_mempolicy04 set_mempolicy04 +set_thread_area01 set_thread_area01 +setfsgid01_16 setfsgid01_16 +setfsgid02_16 setfsgid02_16 +setfsgid03_16 setfsgid03_16 +setfsuid01_16 setfsuid01_16 +setfsuid02_16 setfsuid02_16 +setfsuid03_16 setfsuid03_16 +setfsuid04_16 setfsuid04_16 +setgid01_16 setgid01_16 +setgid02_16 setgid02_16 +setgid03_16 setgid03_16 +sgetmask01 sgetmask01 +setgroups01_16 setgroups01_16 +setgroups02_16 setgroups02_16 +setgroups03_16 setgroups03_16 +setgroups04_16 setgroups04_16 +setregid01_16 setregid01_16 +setregid02_16 setregid02_16 +setregid03_16 setregid03_16 +setregid04_16 setregid04_16 +setresgid01_16 setresgid01_16 +setresgid02_16 setresgid02_16 +setresgid03_16 setresgid03_16 +setresgid04_16 setresgid04_16 +setresuid01_16 setresuid01_16 +setresuid02_16 setresuid02_16 +setresuid03_16 setresuid03_16 +setresuid04_16 setresuid04_16 +setresuid05_16 setresuid05_16 +setreuid01_16 setreuid01_16 +setreuid02_16 setreuid02_16 +setreuid03_16 setreuid03_16 +setreuid04_16 setreuid04_16 +setreuid05_16 setreuid05_16 +setreuid06_16 setreuid06_16 +setreuid07_16 setreuid07_16 +setuid01_16 setuid01_16 +setuid03_16 setuid03_16 +setuid04_16 setuid04_16 +shmctl06 shmctl06 +socketcall01 socketcall01 +socketcall02 socketcall02 +socketcall03 socketcall03 +ssetmask01 ssetmask01 +swapoff01 swapoff01 +swapoff02 swapoff02 +swapon01 swapon01 +swapon02 swapon02 +swapon03 swapon03 +switch01 endian_switch01 +sysinfo03 sysinfo03 +timerfd04 timerfd04 +perf_event_open02 perf_event_open02 +statx07 statx07 +io_uring02 io_uring02 +``` diff --git a/docs/en/release_notes.md b/docs/en/release_notes.md new file mode 100644 index 0000000..14325b3 --- /dev/null +++ b/docs/en/release_notes.md @@ -0,0 +1,4 @@ +# Release notes + +See [JuiceFS changelog](https://github.com/juicedata/juicefs/releases) for release notes. + diff --git a/docs/en/security/_data_protection.md b/docs/en/security/_data_protection.md new file mode 100644 index 0000000..85695cc --- /dev/null +++ b/docs/en/security/_data_protection.md @@ -0,0 +1,2 @@ +# Data Protection + diff --git a/docs/en/security/encrypt.md b/docs/en/security/encrypt.md new file mode 100644 index 0000000..1bfe9e9 --- /dev/null +++ b/docs/en/security/encrypt.md @@ -0,0 +1,71 @@ +# Data Encryption + +## Data Encryption In Transit + +JuiceFS encrypts data during transmission over the network to prevent unauthorized users from eavesdropping on network traffic. + +JuiceFS clients always use HTTPS to upload data to the Object Storage Service, except for the following cases. + +- Uploading to Alibaba Cloud OSS using internal endpoints +- Uploading to UCloud US3 using internal endpoints + + +## Data Encryption At Rest + +JuiceFS supports Data Encryption At Rest. Any data will be encrypted first +before uploading to the object store. With such ability, JuiceFS can effectively prevent data leakage as along as the encryption key is safe and sound. + +JuiceFS uses industry-standard encryption methods (AES-GCM and RSA) in client-side. Encryption and decryption are performed on the JuiceFS client side. +The user only need to do one thing is providing a private key or password when JuiceFS is mounted and uses it like a normal file system. +After the setup, the mounted file system is completely transparent to the application. + +> **NOTE**: The cached data on the client-side is **NOT** encrypted. However, only the root user or owner can access this data. To encrypt the cached data as well, you can put the cached directory in an encrypted file system or block storage. + + +### Encryption and Decryption Method + +A global RSA private key `M` must be created for each encrypted file system. Each object stored in the object store will have its own random symmetric key `S`. Data is encrypted with the symmetric key `S` for AES-GCM encryption, `S` is encrypted with the global RSA private key `M`, and the RSA private key is encrypted using a user-specified passphrase. + +![Encryption At-rest](../images/encryption.png) + +The detailed process of data encryption is as follows: +- Before writing to the object store, the data blocks are compressed using LZ4 or ZStandard. +- A random 256-bit symmetric key `S` and a random seed `N` are generated for each data block. +- AES-GCM-based encryption of each data block using `S` and `N` yields `encrypted_data`. +- To avoid the symmetric key `S` from being transmitted in clear text over the network, the symmetric key `S` is encrypted with the RSA key `M` to obtain the ciphertext `K`. +- The encrypted data `encrypted_data`, the ciphertext `K`, and the random seed `N` are combined into an object and then written to the object storage. + +The steps for decrypting the data are as follows: +- Read the entire encrypted object (it may be a bit larger than 4MB). +- Parse the object data to get the ciphertext `K`, the random seed `N`, and the encrypted data `encrypted_data`. +- Decrypt `K` with RSA key to get symmetric key `S`. +- Decrypt the data `encrypted_data` based on AES-GCM using `S` and `N` to get the data block plaintext. +- Decompress the data block. + + +### Key Management + +The security of RSA keys is critical when data at rest encryption is enabled. If the key is compromised, it may lead to data leakage. If the key is lost, then **all** encrypted data will be lost and cannot be recovered. + +When creating a new volume using `juicefs format`, static encryption can be enabled by specifying the RSA private key with the `-encrypt-rsa-key` parameter, which will be saved to Redis. When the private key is password-protected, the password can be specified using the environment variable `JFS_RSA_PASSPHRASE`. + +Usage: + +1. Generate RSA key + +```shell +$ openssl genrsa -out my-priv-key.pem -aes256 2048 +``` + +2. Provide the key when formatting + +```shell +$ juicefs format --encrypt-rsa-key my-priv-key.pem META-URL NAME +``` + +> **NOTE**: If the private key is password-protected, an environment variable `JFS_RSA_PASSPHRASE` should be exported first before executing `juicefs mount`. + + +### Performance + +TLS, HTTPS, and AES-256 are implemented very efficiently in modern CPUs. Therefore, enabling encryption does not have a significant impact on file system performance. RSA algorithms are relatively slow, especially the decryption process. It is recommended to use 2048-bit RSA keys for storage encryption. Using 4096-bit keys may have a significant impact on reading performance. diff --git a/docs/en/security/trash.md b/docs/en/security/trash.md new file mode 100644 index 0000000..190dc69 --- /dev/null +++ b/docs/en/security/trash.md @@ -0,0 +1,62 @@ +# Trash + +:::note +This feature requires JuiceFS v1.0.0 or higher +::: + +Data safety is always critical for a storage system. JuiceFS enables **trash** feature by default to keep user-removed files for a certain period in a hidden directory named `.trash`. + +## Configure + +When using `juicefs format` command to initialize JuiceFS volume, users may specify `--trash-days ` to configure the number of days during which files are kept in the `.trash` directory. Within this period, user-removed files are not actually purged, so you won't see decreased number in `df` output, and can still find blocks in the object storage. + +- the default value of `trash-days` is 1, which means files in trash will be automatically purged after ONE day. +- use `--trash-days 0` to disable this feature; the trash will be emptied in a short time, and all files removed afterwards will be purged immediately. +- trash is disabled for older versions. If you want to enable it for an existed volume, please upgrade **ALL** clients first and then change `--trash-days` to a positive value manually. + +After initializing a volume, you can still update `trash-days` with the `config` command, e.g: + +```bash +$ juicefs config META-URL --trash-days 7 +``` + +Then you can check new configurations through `status` command: + +```bash +$ juicefs status META-URL + +{ + "Setting": { + ... + "TrashDays": 7 + } +} +``` + +## Usage + +The `.trash` directory is automatically created under root `/`. + +### Tree Structure + +There are only two levels under the tree rooted by `.trash`. The first one is a list of directories that are automatically created by JuiceFS and named as `year-month-day-hour` (e.g. `2021-11-30-10`). All files removed in a certain hour will be moved into the corresponding directory. The second level is just a plain list of removed files and empty directories (usually `rm -rf ` will remove files in `dir` first, and then remove the empty `dir`). Obviously, the original tree structure is lost when files are moved into the trash. To save as much information as possible of the original hierarchy without impact on the performance, JuiceFS renames files in trash to `{parentInode-fileInode-fileName}`. Here `inode` is an internal number used for file system organizing, and can be ignored if you only care about name of the original file. + +:::note +UTC is used when naming directories in the first level. +::: + +:::tip +You can use `juicefs info` to check inode of a file or directory. +::: + +### Access + +All users are allowed to browse the trash and see the full list of removed files. However, files in trash keep their original modes, so users can only read files that they can read before. If JuiceFS is mounted with `--subdir ` (mostly used as a CSI driver on Kubernetes), the whole trash will be hidden and can't be accessed. + +It is not permitted to create files within the trash. Deleting or purging a file are forbidden as well for non-root users, even if he/she is owner of this file. + +### Recover/Purge + +It is suggested to ask root user to recover files, since root is allowed to move them out of trash with a single `mv` command, and causes no data copy. Other users, however, can only recover a file by reading its content and write it to another new file. + +JuiceFS client will check the trash every hour and purge old entries. At lease one active client is required to make it happen. Like recovering, only root user is allowed to purge entries manually. diff --git a/docs/en/tutorials/aliyun.md b/docs/en/tutorials/aliyun.md new file mode 100644 index 0000000..9ef2575 --- /dev/null +++ b/docs/en/tutorials/aliyun.md @@ -0,0 +1,303 @@ +--- +sidebar_label: Use JuiceFS on Alibaba Cloud +sidebar_position: 6 +slug: /clouds/aliyun +--- +# Use JuiceFS on Alibaba Cloud + +As shown in the figure below, JuiceFS is driven by both the database and the object storage. The files stored in JuiceFS are split into fixed-size data blocks and stored in the object store according to certain rules, while the metadata corresponding to the data is stored in the database. + +The metadata is stored completely independently, and the retrieval and processing of files does not directly manipulate the data in the object storage, but first manipulates the metadata in the database, and only interacts with the object storage when the data changes. + +This design can effectively reduce the cost of the object storage in terms of the number of requests, but also allows us to significantly experience the performance improvement brought by JuiceFS. + +![](../images/juicefs-arch-new.png) + +## Preparation + +From the previous architecture description, you can know that JuiceFS needs to be used together with database and object storage. Here we directly use Alibaba Cloud ECS cloud server, combined with cloud database and OSS object storage. + +When you create cloud computing resources, try to choose in the same region, so that resources can access each other through intranet and avoid using public network to incur additional traffic costs. + +### 1. ECS +JuiceFS has no special requirements for server hardware, generally speaking, entry-level cloud servers can also use JuiceFS stably, usually you just need to choose the one that can meet your own business. + +In particular, you do not need to buy a new server or reinstall the system to use JuiceFS, JuiceFS is not business invasive and will not cause any interference with your existing systems and programs, you can install and use JuiceFS on your running server. + +By default, JuiceFS takes up 1GB of hard disk space for caching, and you can adjust the size of the cache space as needed. This cache is a data buffer layer between the client and the object storage, and you can get better performance by choosing a cloud drive with better performance. + +In terms of operating system, JuiceFS can be installed on all operating systems provided by Alibaba Cloud ECS. + +**The ECS specification used in this document are as follows:** + +| **Instance Specification** | ecs.t5-lc1m1.small | +| -------------------------- | -------------------------- | +| **CPU** | 1 core | +| **MEMORY** | 1 GB | +| **Storage** | 40 GB | +| **OS** | Ubuntu Server 20.04 64-bit | +| **Location** | Shanghai | + +### 2. Cloud Database + +JuiceFS will store all the metadata corresponding to the data in a separate database, which is currently support Redis, MySQL, PostgreSQL and SQLite. + +Depending on the database type, the performance and reliability of metadata are different. For example, Redis runs entirely on memory, which provides the ultimate performance, but is difficult to operate and maintain, and has relatively low reliability. SQLite is a single-file relational database with low performance and is not suitable for large-scale data storage, but it is configuration-free and suitable for a small amount of data storage on a single machine. + +If you just want to evaluate the functionality of JuiceFS, you can build the database manually on ECS. When you want to use JucieFS in a production environment, the cloud database service is usually a better choice if you don't have a professional database operation and maintenance team. + +Of course, you can also use cloud database services provided on other platforms if you wish.But in this case, you have to expose the database port to the public network, which also has some security risks. + +If you must access the database through the public network, you can enhance the security of your data by strictly limiting the IP addresses that are allowed to access the database through the whitelist feature provided by the cloud database console. + +On the other hand, if you cannot successfully connect to the cloud database through the public network, then you can check the whitelist of the database. + +| Database | Redis | MySQL、PostgreSQL | SQLite | +| :-------------: | :-----------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| **Performance** | High | Medium | Low | +| **Management** | High | Medium | Low | +| **Reliability** | Low | Medium | Low | +| **Scenario** | Massive data, distributed high-frequency read and write | Massive data, distributed low and medium frequency read and write | Low frequency read and write in single machine for small amount of data | + +> **Note**: If you use [JuiceFS Hosted Service](https://juicefs.com/docs/en/hosted_service.html), you do not need to prepare a database. + +**This article uses the [ApsaraDB for Redis](https://www.alibabacloud.com/product/apsaradb-for-redis), and the following is pseudo address compiled for demonstration purposes only:** + +| Redis Version | 5.0 Community Edition | +| --------------------------- | -------------------------------------- | +| **Instance Specification** | 256M Standard master-replica instances | +| **Connection Address** | herald-sh-abc.redis.rds.aliyuncs.com | +| **Available Zone** | Shanghai | + +### 3. Object Storage OSS + +JuiceFS will store all the data in object storage, which supports almost all object storage services. However, to get the best performance, when using Alibaba Cloud ECS, with OSS object storage is usually the optimal choice. However, please note that choosing ECS and OSS Bucket in the same region so that they can be accessed through intranet not only has low latency, but also does not require additional traffic costs. + +Of course, you can also use object storage services provided by other cloud platforms if you wish, but this is not recommended. First of all, accessing object storage from other cloud platforms through ECS has to take public network, and object storage will incur traffic costs, and the access latency will be higher compared to this, which may affect the performance of JuiceFS. + +Alibaba Cloud OSS has different storage levels, and since JuiceFS needs to interact with object storage frequently, it is recommended to use standard tier. You can use it with OSS resource pack to reduce the cost of using object storage. + +### API access secret key + +Alibaba Cloud OSS needs to be accessed through API, you need to prepare the access secret key, including `Access Key ID` and `Access Key Secret`, [click here](https://www.alibabacloud.com/help/doc-detail/125558.htm) to get the way. + +> **Security Advisory**: Explicit use of the API access secret key may lead to key compromise, it is recommended to assign [RAM Role](https://www.alibabacloud.com/help/doc-detail/110376.htm) to the cloud server. Once an ECS has been granted access to the OSS, the API access key is not required to access the OSS. + +## Installation + +We currently using Ubuntu Server 20.04 64-bit, so you can download the latest version of the client by running the following commands. You can also choose another version by visiting the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page. + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +After downloading, unzip the program into the `juice` folder. + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +Install the JuiceFS client to `/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +Execute the command and see the help message `juicefs` returned, which means the client installation is successful. + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.15.2 (2021-07-07T05:51:36Z 4c16847) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +JuiceFS has good cross-platform compatibility and is supported on Linux, Windows and macOS. This article focuses on the installation and use of JuiceFS on Linux, if you need to know how to install it on other systems, please [check the documentation](../getting-started/installation.md). + +## Creating JuiceFS + +Once the JuiceFS client is installed, you can now create the JuiceFS storage using the Redis database and OSS object storage that you prepared earlier. + +Technically speaking, this step should be called "Format a volume". However, given that many users may not understand or care about the standard file system terminology, we will simply call the process "Create a JuiceFS Storage". + +The following command creates a storage called `mystor`, i.e., a file system, using the `format` subcommand provided by the JuiceFS client. + +```shell +$ juicefs format \ + --storage oss \ + --bucket https:// \ + --access-key \ + --secret-key \ + redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + mystor +``` + +**Option description:** + +- `--storage`: Specify the type of object storage, [click here to view](../reference/how_to_setup_object_storage.md) object storage services supported by JuiceFS. +- `--bucket`: Bucket domain name of the object storage. When using OSS, just fill in the bucket name, no need to fill in the full domain name, JuiceFS will automatically identify and fill in the full address. +- `--access-key` and `--secret-key`: the secret key pair to access the object storage API, [click here](https://www.alibabacloud.com/help/doc-detail/125558.htm) to get the way. + +> Redis 6.0 authentication requires username and password parameters in the format of `redis://username:password@redis-server-url:6379/1`. Currently, Alibaba Cloud Redis only provides Reids 4.0 and 5.0 versions, which require only a password for authentication, and just leave the username empty when setting the Redis server address, for example: `redis://:password@redis-server-url:6379/1`. + +When using the RAM role to bind to the ECS, the JucieFS storage can be created by specifying `--storage` and `--bucket` without providing the API access key. The command can be rewritten as follows: + +```shell +$ juicefs format \ + --storage oss \ + --bucket https://mytest.oss-cn-shanghai.aliyuncs.com \ + redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + mystor +``` + +Output like the following means the file system was created successfully. + +```shell +2021/07/13 16:37:14.264445 juicefs[22290] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 +2021/07/13 16:37:14.277632 juicefs[22290] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:37:14.281432 juicefs[22290] : Ping redis: 3.609453ms +2021/07/13 16:37:14.527879 juicefs[22290] : Data uses oss://mytest/mystor/ +2021/07/13 16:37:14.593450 juicefs[22290] : Volume is formatted as {Name:mystor UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://mytest340 AccessKey:LTAI4G4v6ioGzQXy56m3XDkG SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +## Mount JuiceFS + +When the file system is created, the information related to the object storage is stored in the database, so there is no need to enter information such as the bucket domain and secret key when mounting. + +Use the `mount` subcommand to mount the file system to the `/mnt/jfs` directory. + +```shell +$ sudo juicefs mount -d redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs +``` + +> **Note**: When mounting the file system, only the Redis database address is required, not the file system name. The default cache path is `/var/jfsCache`, please make sure the current user has enough read/write permissions. + +Output similar to the following means that the file system was mounted successfully. + +```shell +2021/07/13 16:40:37.088847 juicefs[22307] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com/1 +2021/07/13 16:40:37.101279 juicefs[22307] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:40:37.104870 juicefs[22307] : Ping redis: 3.408807ms +2021/07/13 16:40:37.384977 juicefs[22307] : Data use oss://mytest/mystor/ +2021/07/13 16:40:37.387412 juicefs[22307] : Disk cache (/var/jfsCache/4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b/): capacity (1024 MB), free ratio (10%), max pending pages (15) +.2021/07/13 16:40:38.410742 juicefs[22307] : OK, mystor is ready at /mnt/jfs +``` + +Using the `df` command, you can see how the file system is mounted. + +```shell +$ df -Th +File system type capacity used usable used% mount point +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +After the file system is successfully mounted, you can now store data in the `/mnt/jfs` directory as if you were using a local hard drive. + +> **Multi-Host Sharing**: JuiceFS storage supports being mounted by multiple cloud servers at the same time. You can install the JuiceFS client on other could server and then use `redis://:@herald-sh-abc.redis.rds.aliyuncs. com:6379/1` database address to mount the file system on each host. + +## File System Status + +Use the `status` subcommand of the JuiceFS client to view basic information and connection status of a file system. + +```shell +$ juicefs status redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 + +2021/07/13 16:56:17.143503 juicefs[22415] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 +2021/07/13 16:56:17.157972 juicefs[22415] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:56:17.161533 juicefs[22415] : Ping redis: 3.392906ms +{ + "Setting": { + "Name": "mystor", + "UUID": "4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b", + "Storage": "oss", + "Bucket": "https://mytest", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 3, + "Heartbeat": "2021-07-13T16:55:38+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "demo-test-sh", + "MountPoint": "/mnt/jfs", + "ProcessID": 22330 + } + ] +} +``` + +## Unmount JuiceFS + +The file system can be unmounted using the `umount` command provided by the JuiceFS client, e.g. + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **Note**: Forced unmount of the file system in use may result in data corruption or loss, so please be sure to proceed with caution. + +## Auto-mount on Boot + +If you don't want to manually remount JuiceFS storage on reboot, you can set up automatic mounting of the file system. + +First, you need to rename the `juicefs` client to `mount.juicefs` and copy it to the `/sbin/` directory. + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +Edit the `/etc/fstab` configuration file and add a new record. + +```shell +redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +The mount option `cache-size=20480` means to allocate 20GB local disk space for JuiceFS cache, please decide the allocated cache size according to your actual ECS hard disk capacity. In general, allocating more cache space for JuiceFS will result in better performance. + +You can adjust the FUSE mount options in the above configuration as needed, for more information please [check the documentation](../reference/fuse_mount_options.md). + +> **Note**: Please replace the Redis address, mount point, and mount options in the above configuration file with your actual information. diff --git a/docs/en/tutorials/aws.md b/docs/en/tutorials/aws.md new file mode 100644 index 0000000..f1b13e0 --- /dev/null +++ b/docs/en/tutorials/aws.md @@ -0,0 +1,243 @@ +--- +sidebar_label: Use JuiceFS on AWS +sidebar_position: 4 +slug: /clouds/aws +--- +# Use JuiceFS on AWS + +AWS is the world's leading cloud computing platform, offering almost all types of cloud computing services. Thanks to the rich product line of AWS, users can choose JuiceFS components in a very flexible way. + +## Preparation + +As you can see from the previous architecture, JuiceFS consists of the following three components: + +1. A **JuiceFS client** installed on the server +2. The **object storage** used to store data +3. A **database** for storing metadata + +### 1. Servers + +Amazon EC2 Cloud Server is one of the most basic and widely used cloud services on the AWS platform. It offers more than 400 instance sizes and 81 availability zones in 25 data centers around the world, giving users the flexibility to choose and adjust the configuration of EC2 instances according to their actual needs. + +For new users, you don't need to think too much about JuiceFS configuration requirements, because even the least configured EC2 instances can be easily created and mounted to use JuiceFS storage. Usually, you only need to consider the hardware requirements of your business system. + +JuiceFS clients will occupy 1GB of disk as cache by default. When dealing with a large number of files, the client will cache the data on disk first and then upload it to the object storage asynchronously. Choosing a disk with higher IO and reserving and setting a larger cache will allow JuiceFS to have better performance. + +### 2. Object Storage + +Amazon S3 is the de facto standard for public cloud object storage services, and the object storage services provided by other major cloud platforms are usually compatible with the S3 API, which allows programs developed for S3 to freely switch between object storage services of other platforms. + +JuiceFS fully supports Amazon S3 and all S3-like object storage services, and you can see the documentation for [all object storage services supported by JuiceFS](../reference/how_to_setup_object_storage.md). + +Amazon S3 offers a range of storage classes suitable for different use cases, the main ones being + +- Amazon S3 STANDARD: general-purpose storage for frequently accessed data +- Amazon S3 STANDARD_IA: for data that is needed for a long time but accessed less frequently +- S3 Glacier: for data that is archived over time + +The standard type of S3 should usually be used for JuiceFS, because other types than the standard type are less expensive but incur additional costs when retrieving data. + +In addition, access to the object storage service requires user authentication via `Access Key` and `Secret Key`, which you can refer to the document [Controlling Access to Storage Buckets with User Policies](https://docs.aws.amazon.com/AmazonS3/latest/userguide/walkthrough1.html) to create it. When accessing S3 through EC2 cloud server, you can also assign [IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) to EC2 to enable key-free invocation of S3 API on EC2. + +### 3. Database + +The ability of data and metadata to be accessed by multiple hosts is key to a distributed file system, and in order for the metadata information generated by JuiceFS to be accessible via Internet requests like S3, the database for storing metadata should also be chosen as a network-oriented database. + +Amazon RDS and ElastiCache are two cloud database services provided by AWS, both of which can be directly used for metadata storage in JuiceFS. Amazon RDS is a relational database that supports various engines such as MySQL, MariaDB, PostgreSQL, etc. ElastiCache is a memory-based caching cluster service which has two engines, the Redis engine is suite for JuiceFS. + +In addition, you can also build your own database on EC2 cloud server for JuiceFS to store metadata. + +### 4. Cautions + +- JuiceFS is not business invasive and will not affect the normal operation of existing systems. +- When selecting cloud services, it is recommended to select all cloud services in the same **region**, which is equivalent to all services being on the same intranet, with the lowest latency and fastest inter-access. And, according to AWS billing rules, it is free to transfer data between basic cloud services in the same region. In other words, when you select cloud services in different regions, for example, EC2 is selected in `ap-east-1`, ElastiCache is selected in `ap-southeast-1`, and S3 is selected in `us-east-2`, the inter-access between each cloud service in this case will incur traffic charges. +- JuiceFS does not require the use of object storage and databases from the same cloud platform; you can flexibly mix and match cloud services from different platforms as needed. For example, you can use EC2 to run JuiceFS client with AliCloud's Redis database and Backbalze B2 object storage. Of course, JuiceFS storage composed of cloud services on the same platform and in the same region will perform even better. + +## Deployment and Usage + +Next, we briefly describe how to install and use JuiceFS using the ElastiCache cluster with EC2 cloud server, S3 object storage and Redis engine in the same region as an example. + +### 1. Install the client + +Here we are using a Linux system with x64 bit architecture. Execute the following commands, the latest version of JuiceFS client will be downloaded. + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +After downloading, unzip the program into the `juice` folder. + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +Install the JuiceFS client to the system $PATH, e.g., `/usr/local/bin`. + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +Execute the command and see the returned help message, which means the client installation is successful. + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.17.0 (2021-09-24T04:17:26Z e115dc4) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime statistics + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +> **Tip**: If you execute the `juicefs` command and the terminal returns `command not found`, it may be because the `/usr/local/bin` directory is not in the system's `PATH` executable path. You can use the `echo $PATH` command to check the system's set executable path and reinstall the client to the correct location. You can also add `/usr/local/bin` to the `PATH`. + +JuiceFS has good cross-platform compatibility and is supported on both Linux, Windows and macOS. If you need to know how to install it on other systems, please check the [official documentation](../getting-started/installation.md). + +### 3. Create File System + +The `format` subcommand of the JuiceFS client is used to create (format) the file system, here we use S3 as the data store and ElastiCache as the metadata store, install the client on EC2 and create the JuiceFS file system with the following command format. + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://.s3..amazonaws.com \ + --access-key \ + --secret-key \ + redis://[]:@:6379/1 \ + mystor +``` + +**Option Description:** + +- `--storage`: Specify the type of object storage, here we use S3. +- `--bucket`: Bucket domain for object storage. +- `--access-key` and `--secret-key`: The secret key pair to access the S3 API. + +> For Redis 6.0 and above, authentication requires both username and password, and the address format is `redis://username:password@redis-server-url:6379/1`. For Reids 4.0 and 5.0, authentication requires only the password, and the username needs to be left blank when setting the Redis server address. For example: `redis://:password@redis-server-url:6379/1`. + +When using the IAM role to bind to EC2, you only need to specify `--storage` and `--bucket` options, and do not need to provide the API access key. It is also possible to assign ElastiCache access to the IAM role, and then instead of providing Redis authentication information, you can simply enter the Redis URL, which can be rewritten as + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://herald-demo.s3..amazonaws.com \ + redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 \ + mystor +``` + +Seeing output like the following means that the file system was created successfully. + +```shell +2021/10/14 08:38:32.211044 juicefs[10391] : Meta address: redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 +2021/10/14 08:38:32.216566 juicefs[10391] : Ping redis: 383.789µs +2021/10/14 08:38:32.216915 juicefs[10391] : Data use s3://herald-demo/mystor/ +2021/10/14 08:38:32.412112 juicefs[10391] : Volume is formatted as {Name:mystor UUID:21a2cafd-f5d8-4a76-ae4d-482c8e2d408d Storage:s3 Bucket:https://herald-demo.s3.ap-southeast-1.amazonaws.com AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +### 4. Mount the file system + +The process of creating the file system will store the object store including API keys into the database, so you do not need to input the bucket domain and the secret key of the object storage when mounting. + +Use the `mount` subcommand of the JuiceFS client to mount the file system to the `/mnt/jfs` directory. + +```shell +$ sudo juicefs mount -d redis://[]:@:6379/1 /mnt/jfs +``` + +> **Note**: When mounting the file system, only the database address is required, not the file system name. The default cache path is `/var/jfsCache`, please make sure the current user has enough read/write permissions. + +You can optimize JuiceFS by adjusting the [mount options](../reference/command_reference.md#juicefs-mount), for example by `--cache-size` to change the cache to 20GB. + +```shell +$ sudo juicefs mount --cache-size 20480 -d redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 /mnt/jfs +``` + +Seeing output like the following means the file system was mounted successfully. + +```shell +2021/10/14 08:47:49.623814 juicefs[10601] : Meta address: redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 +2021/10/14 08:47:49.628157 juicefs[10601] : Ping redis: 426.127µs +2021/10/14 08:47:49.628941 juicefs[10601] : Data use s3://herald-demo/mystor/ +2021/10/14 08:47:49.629198 juicefs[10601] : Disk cache (/var/jfsCache/21a2cafd-f5d8-4a76-ae4d-482c8e2d408d/): capacity (20480 MB), free ratio (10%), max pending pages (15) +2021/10/14 08:47:50.132003 juicefs[10601] : OK, mystor is ready at /mnt/jfs +``` + +Using the `df` command, you can see how the file system is mounted. + +```shell +$ df -Th +File system type capacity used usable used% mount point +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +Once mounted, it can be used like a local disk, and the data stored in the `/mnt/jfs` directory is coordinated by the JuiceFS client and eventually stored in the S3 object store. + +> **Multi-Host Sharing**: JuiceFS supports being mounted by multiple hosts at the same time, you can install the JuiceFS client on any cloud server on any platform using `redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1` the database address can be shared by mounting the file system, but you need to make sure that the host on which the file system is mounted has proper access to the database and the S3 used with it. + +### 5. Unmount JuiceFS + +The file system can be unmounted using the `umount` command provided by the JuiceFS client, e.g. + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **Note**: Forced unmount of the file system in use may result in data corruption or loss, so please be sure to proceed with caution. + +### 6. Auto-mount on boot + +If you don't want to re-mount JuiceFS storage manually every time you reboot your system, you can set up an automatic mount. + +First, you need to rename the `juicefs` client to `mount.juicefs` and copy it to the `/sbin/` directory. + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +Edit the `/etc/fstab` configuration file and add a new record. + +```shell +redis://[]:@:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +The mount option `cache-size=20480` means to allocate 20GB local disk space for JuiceFS cache, please decide the allocated cache size based on your actual EBS disk capacity. + +You can adjust the FUSE mount options in the above configuration as needed, for more details please [check the documentation](../reference/fuse_mount_options.md). + +> **Note**: Please replace the Redis address, mount point, and mount options in the above configuration file with your actual information. diff --git a/docs/en/tutorials/digitalocean.md b/docs/en/tutorials/digitalocean.md new file mode 100644 index 0000000..03208e9 --- /dev/null +++ b/docs/en/tutorials/digitalocean.md @@ -0,0 +1,292 @@ +--- +sidebar_label: Use JuiceFS on DigitalOcean +sidebar_position: 5 +slug: /clouds/digitalocean +--- +# Use JuiceFS on DigitalOcean + +JuiceFS is designed for the cloud, using the cloud platform out-of-the-box storage and database services, and can be configured and put into use in as little as a few minutes. This article uses the DigitalOcean as an example to introduce how to quickly and easily install and use JuiceFS on the cloud computing platform. + +## Preparation + +JuiceFS is powered by a combination of storage and database, so the things you need to prepare should include. + +### 1. Cloud Server +The cloud server on DigitalOcean is called Droplet. If you already have a Droplet, you do not need to purchase a new one separately in order to use JuiceFS. Whichever cloud server needs to use JuiceFS storage on it, install the JuiceFS client for it. + +#### Hardware Specifications +JuiceFS has no special hardware requirements, and any size Droplet can be used stably. However, it is recommended to choose a better performing SSD and reserve at least 1GB for JuiceFS to use as local cache. + +#### Operating System + +JuiceFS supports Linux, BSD, macOS and Windows. In this article, we will take Ubuntu Server 20.04 as an example. + +### 2. Object Storage + +JuiceFS uses object storage to store all your data, and using Spaces on DigitalOcean is the easiest solution. Spaces is an S3-compatible object storage service that works right out of the box. It is recommended to choose the same region as Droplet to get the best access speed and also to avoid additional traffic costs. + +Of course, you can also use an object storage service from another platform or build it manually using Ceph or MinIO. In short, you are free to choose the object storage you want to use, just make sure that the JuiceFS client can access the object storage. + +Here, we created a Spaces storage bucket named `juicefs` with the region `sgp1` in Singapore, and it is accessible at: + +- https://juicefs.sgp1.digitaloceanspaces.com + +In addition, you also need to create `Spaces access keys` in the API menu, which JuiceFS needs to access the Spaces API. + +### 3. Database + +Unlike normal file systems, JuiceFS stores all metadata corresponding to the data in a separate database, and the larger the size of the stored data, the better the performance. Currently, JuiceFS supports common databases such as Redis, TiKV, MySQL/MariaDB, PostgreSQL, SQLite, etc., while support for other databases is under continuous development. If the database you need is not supported at the moment, please submit [issue](https://github.com/juicedata/juicefs/issues) feedback. + +Each database has its own advantages and disadvantages in terms of performance, size and reliability, and you should choose according to the actual needs of the scenario. + +Don't worry about the choice of database, the JuiceFS client provides a metadata migration feature that allows you to easily export and migrate metadata from one database to another. + +For this article, we use DigitalOcean's Redis 6 database hosting service, choose `Singapore`, and select the same VPC private network as the existing Droplet. It takes about 5 minutes to create the Redis, and we follow the setup wizard to initialize the database. + +![](../images/digitalocean-redis-guide.png) + +By default, the Redis allows all inbound connections. For security reasons, you should select the Droplet that have access to the Redis in the security setting section of the setup wizard in the `Add trusted sources`, that is, only allow the selected host to access the Redis. + +In the setting of the eviction policy, it is recommended to select `noeviction`, that is, when the memory is exhausted, only errors are reported and no data is evictioned. + +> **Note**: In order to ensure the safety and integrity of metadata, please do not select `allkeys-lru` and `allkey-random` for the eviction policy. + +The access address of the Redis can be found in the `Connection Details` of the console. If all computing resources are in DigitalOcean, it is recommended to use the VPC private network for connection first, which can maximize security. + +![](../images/digitalocean-redis-url.png) + +## Installation and Use + +### 1. Install JuiceFS client + +We currently using Ubuntu Server 20.04, execute the following commands in sequence to install the latest version of the client. + +Check current system and set temporary environment variable: + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +Download the latest version of the client software package adapted to the current system: + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +Unzip the installation package: + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +Install the client to `/usr/local/bin`: + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +Execute the command and see the command help information returned to `juicefs`, which means that the client is installed successfully. + +```shell +$ juicefs + +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.16.2 (2021-08-25T04:01:15Z 29d6fee) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime stats + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +In addition, you can also visit the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page to select other versions for manual installation. + +### 2. Create a file system + +To create a file system, use the `format` subcommand, the format is: + +```shell +$ juicefs format [command options] META-URL NAME +``` + +The following command creates a file system named `mystor`: + +```shell +$ juicefs format \ + --storage space \ + --bucket https://juicefs.sgp1.digitaloceanspaces.com \ + --access-key \ + --secret-key \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 \ + mystor +``` + +**Parameter Description:** + +- `--storage`: Specify the data storage engine, here is `space`, click here to view all [supported storage](../reference/how_to_setup_object_storage.md). +- `--bucket`: Specify the bucket access address. +- `--access-key` and `--secret-key`: Specify the secret key for accessing the object storage API. +- The Redis managed by DigitalOcean needs to be accessed with TLS/SSL encryption, so it needs to use the `rediss://` protocol header. The `/1` added at the end of the link represents the use of Redis's No. 1 database. + +If you see output similar to the following, it means that the file system is created successfully. + +```shell +2021/08/23 16:36:28.450686 juicefs[2869028] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:36:28.481251 juicefs[2869028] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:36:28.481763 juicefs[2869028] : Ping redis: 331.706µs +2021/08/23 16:36:28.482266 juicefs[2869028] : Data uses space://juicefs/mystor/ +2021/08/23 16:36:28.534677 juicefs[2869028] : Volume is formatted as {Name:mystor UUID:6b0452fc-0502-404c-b163-c9ab577ec766 Storage:space Bucket:https://juicefs.sgp1.digitaloceanspaces.com AccessKey:7G7WQBY2QUCBQC5H2DGK SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +### 3. Mount a file system + +To mount a file system, use the `mount` subcommand, and use the `-d` parameter to mount it as a daemon. The following command mounts the newly created file system to the `mnt` directory under the current directory: + +```shell +$ sudo juicefs mount -d \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt +``` + +The purpose of using `sudo` to perform the mount operation is to allow JuiceFS to have the authority to create a cache directory under `/var/`. Please note that when mounting the file system, you only need to specify the `database address` and the `mount point`, not the name of the file system. + +If you see output similar to the following, it means that the file system is mounted successfully. + +```shell +2021/08/23 16:39:14.202151 juicefs[2869081] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:39:14.234925 juicefs[2869081] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:39:14.235536 juicefs[2869081] : Ping redis: 446.247µs +2021/08/23 16:39:14.236231 juicefs[2869081] : Data use space://juicefs/mystor/ +2021/08/23 16:39:14.236540 juicefs[2869081] : Disk cache (/var/jfsCache/6b0452fc-0502-404c-b163-c9ab577ec766/): capacity (1024 MB), free ratio (10%), max pending pages (15) +2021/08/23 16:39:14.738416 juicefs[2869081] : OK, mystor is ready at mnt +``` + +Use the `df` command to see the mounting status of the file system: + +```shell +$ df -Th +File system type capacity used usable used% mount point +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /home/herald/mnt +``` + +As you can see from the output information of the mount command, JuiceFS defaults to sets 1024 MB as the local cache. Setting a larger cache can make JuiceFS have better performance. You can set the cache (in MiB) through the `--cache-size` option when mounting a file system. For example, set a 20GB local cache: + +```shell +$ sudo juicefs mount -d --cache-size 20000 \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt +``` + +After the file system is mounted, you can store data in the `~/mnt` directory just like using a local hard disk. + +### 4. File system status + +Use the `status` subcommand to view the basic information and connection status of a file system. You only need to specify the database URL. + +```shell +$ juicefs status rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:48:48.567046 juicefs[2869156] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:48:48.597513 juicefs[2869156] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:48:48.598193 juicefs[2869156] : Ping redis: 491.003µs +{ + "Setting": { + "Name": "mystor", + "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766", + "Storage": "space", + "Bucket": "https://juicefs.sgp1.digitaloceanspaces.com", + "AccessKey": "7G7WQBY2QUCBQC5H2DGK", + "SecretKey": "removed", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 1, + "Heartbeat": "2021-08-23T16:46:14+08:00", + "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)", + "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01", + "MountPoint": "/home/herald/mnt", + "ProcessID": 2869091 + }, + { + "Sid": 2, + "Heartbeat": "2021-08-23T16:47:59+08:00", + "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)", + "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01", + "MountPoint": "/home/herald/mnt", + "ProcessID": 2869146 + } + ] +} +``` + +### 5. Unmount a file system + +Use the `umount` subcommand to unmount a file system, for example: + +```shell +$ sudo juicefs umount ~/mnt +``` + +> **Note**: Force unmount the file system in use may cause data damage or loss, please be careful to operate. + +### 6. Auto mount at boot + +If you don't want to manually remount JuiceFS every time you restart the system, you can set up automatic mounting. + +First, you need to rename the `juicefs` client to `mount.juicefs` and copy it to the `/sbin/` directory: + +```shell +$ sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +Edit the `/etc/fstab` configuration file and add a new record: + +```shell +rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 /home/herald/mnt juicefs _netdev,cache-size=20480 0 0 +``` + +In the mount option, `cache-size=20480` means to allocate 20GiB of local disk space as the local cache of JuiceFS. Please decide the allocated cache size according to the actual hardware. You can adjust the [FUSE mount options](../reference/fuse_mount_options.md) in the above configuration according to your needs. + +### 7. Multi-host shared + +The JuiceFS file system supports being mounted by multiple cloud servers at the same time, and there is no requirement for the geographic location of the cloud server. It can easily realize the real-time data sharing of servers between the same platform, between cross-cloud platforms, and between public and private clouds. + +Not only that, the shared mount of JuiceFS can also provide strong data consistency guarantee. When multiple servers mount the same file system, the writes confirmed on the file system will be visible in real time on all hosts. + +To use the shared mount, it is important to ensure that the database and object storage service that make up the file system can be accessed by each host to mount it. In the demonstration environment of this article, the Spaces object storage is open to the entire Internet, and it can be read and written through the API as long as the correct access key is used. But for the Redis database managed by DigitalOcean, you need to configure the access strategy reasonably to ensure that the hosts outside the platform have access permissions. + +When you mount the same file system on multiple hosts, first create a file system on any host, then install the JuiceFS client on every hosts, and use the same database address to mount it with the `mount` command. Pay special attention to the fact that the file system only needs to be created once, and there should be no need to repeat file system creation operations on other hosts. diff --git a/docs/en/tutorials/juicefs_on_k3s.md b/docs/en/tutorials/juicefs_on_k3s.md new file mode 100644 index 0000000..86573d1 --- /dev/null +++ b/docs/en/tutorials/juicefs_on_k3s.md @@ -0,0 +1,273 @@ +--- +sidebar_label: Use JuiceFS on K3s +sidebar_position: 1 +slug: /juicefs_on_k3s +--- +# Use JuiceFS on K3s + +[K3s](https://k3s.io/) is a functionally optimized lightweight Kubernetes distribution that is fully compatible with Kubernetes, that is, almost all operations on Kubernetes can be performed on K3s. K3s has packaged the entire container orchestration system into a binary program with a capacity of less than 100MB, which greatly reduces the environment dependency and installation of deploying Kubernetes production clusters. In contrast, K3s has lower performance requirements for the operating system, and ARM devices such as Raspberry Pi can be used to form a cluster. + +In this article, we will build a K3s cluster with two nodes, install and configure [JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver) for the cluster, and finally create a Nginx Pod for verification. + +## Deploy a K3s cluster + +K3s has very low **minimum requirements** for hardware: + +- **Memory**:512MB+(recommend 1GB+) +- **CPU**:1 core + +When deploying a production cluster, you can usually use the Raspberry Pi 4B (4 CPU cores, 8G memory) as the starting point for the hardware of a node. For details, see [Hardware Requirements](https://rancher.com/docs/k3s/latest/en/installation/installation-requirements/#hardware). + +### K3s server node + +The IP address of the server node is: `192.168.1.35` + +Use the script officially provided by K3s to deploy the server node on a regular Linux distribution. + +```shell +$ curl -sfL https://get.k3s.io | sh - +``` + +After the deployment is successful, the K3s service will automatically start, and kubectl and other tools will also be installed. + +Execute the command to view the status of the node: + +```shell +$ sudo kubectl get nodes +NAME STATUS ROLES AGE VERSION +k3s-s1 Ready control-plane,master 28h v1.21.4+k3s1 +``` + +Get the `node-token`: + +```shell +$ sudo -u root cat /var/lib/rancher/k3s/server/node-token +K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 +``` + +### K3s worker node + +The IP address of the worker node is: `192.168.1.36` + +Execute the following command and change the value of `K3S_URL` to the IP or domain name of the server node, the default port is `6443`. Replace the value of `K3S_TOKEN` with the `node-token` obtained from the server node. + +```shell +$ curl -sfL https://get.k3s.io | K3S_URL=http://192.168.1.35:6443 K3S_TOKEN=K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 sh - +``` + +After the deployment is successful, go back to the server node to check the node status: + +```shell +$ sudo kubectl get nodes +NAME STATUS ROLES AGE VERSION +k3s-s1 Ready control-plane,master 28h v1.21.4+k3s1 +k3s-n1 Ready 28h v1.21.4+k3s1 +``` + +## Install CSI Driver + +It is consistent with the method of [Use JuiceFS on Kubernetes](../deployment/how_to_use_on_kubernetes.md), you can install it through Helm or kubectl. + +Here we use kubectl, execute the following command to install the CSI Driver: + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml +``` + +### Create Storage Class + +Copy and modify the following code to create a configuration file, for example: `juicefs-sc.yaml` + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: juicefs-sc-secret + namespace: kube-system +type: Opaque +stringData: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + access-key: "" + secret-key: "" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +volumeBindingMode: Immediate +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +The `stringData` part of the configuration file is used to set the information related to the JuiceFS file system. It will create the file system based on the information you specify. When you need to use the pre-created file system in the storage class, you only need to fill in the `name` and `metaurl`, and the other items can be deleted or the value can be left blank. + +Execute the command to deploy the storage class: + +```shell +$ kubectl apply -f juicefs-sc.yaml +``` + +View storage class status: + +```shell +$ sudo kubectl get sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +local-path (default) rancher.io/local-path Delete WaitForFirstConsumer false 28h +juicefs-sc csi.juicefs.com Retain Immediate false 28h +``` + +> **Note**: A storage class is associated with a JuiceFS file system. You can create as many storage classes as you need. But you need to pay attention to modifying the storage class name in the configuration file to avoid conflicts with the same name. + +## Use JuiceFS to persist Nginx data + +Next, deploy an Nginx Pod, using the persistent storage declared by the JuiceFS storage class. + +### Depolyment + +Create a configuration file, for example: `depolyment.yaml` + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: web-pvc +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 10Pi + storageClassName: juicefs-sc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run + labels: + app: nginx +spec: + replicas: 2 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /config + name: web-data + volumes: + - name: web-data + persistentVolumeClaim: + claimName: web-pvc +``` + +Depoly it: + +``` +$ sudo kubectl apply -f depolyment.yaml +``` + +### Service + +Create a configuration file, for example: `service.yaml` + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: nginx-run-service +spec: + selector: + app: nginx + ports: + - name: http + port: 80 +``` + +Depoly it: + +```shell +$ sudo kubectl apply -f service.yaml +``` + +### Ingress + +K3s is pre-installed with traefik-ingress by default. Create an ingress for Nginx through the following configuration. For example: `ingress.yaml` + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: nginx-run-ingress + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web +spec: + rules: + - http: + paths: + - pathType: Prefix + path: "/web" + backend: + service: + name: nginx-run-service + port: + number: 80 +``` + +Depoly it: + +```shell +$ sudo kubectl apply -f ingress.yaml +``` + +### Visit + +After the deployment is complete, use the host on the same LAN to access any cluster node, and you can see the Nginx welcome page. + +![](../images/k3s-nginx-welcome.png) + +Next, check whether the container has successfully mounted JuiceFS, and execute the command to check the pod status: + +```shell +$ sudo kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-run-7d6fb7d6df-qhr2m 1/1 Running 0 28h +nginx-run-7d6fb7d6df-5hpv7 1/1 Running 0 24h +``` + +Execute the command to view the file system mount status of any pods: + +```shell +$ sudo kubectl exec nginx-run-7d6fb7d6df-qhr2m -- df -Th +Filesystem Type Size Used Avail Use% Mounted on +overlay overlay 20G 3.2G 17G 17% / +tmpfs tmpfs 64M 0 64M 0% /dev +tmpfs tmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup +JuiceFS:jfs fuse.juicefs 1.0P 174M 1.0P 1% /config +/dev/sda1 ext4 20G 3.2G 17G 17% /etc/hosts +shm tmpfs 64M 0 64M 0% /dev/shm +tmpfs tmpfs 2.0G 12K 2.0G 1% /run/secrets/kubernetes.io/serviceaccount +tmpfs tmpfs 2.0G 0 2.0G 0% /proc/acpi +tmpfs tmpfs 2.0G 0 2.0G 0% /proc/scsi +tmpfs tmpfs 2.0G 0 2.0G 0% /sys/firmware +``` + +As you can see, the file system named `jfs` has been mounted to the `/config` directory of the container, and the used space is 174M. + +This indicates that the Pods in the cluster have been successfully configured and used JuiceFS to persist data. diff --git a/docs/en/tutorials/juicefs_on_kubesphere.md b/docs/en/tutorials/juicefs_on_kubesphere.md new file mode 100644 index 0000000..7d9f55c --- /dev/null +++ b/docs/en/tutorials/juicefs_on_kubesphere.md @@ -0,0 +1,136 @@ +--- +sidebar_label: Use JuiceFS on KubeSphere +sidebar_position: 3 +slug: /juicefs_on_kubesphere +--- +# Use JuiceFS on KubeSphere + +[KubeSphere](https://kubesphere.com.cn/) is an application-centric multi-tenant container platform built on Kubernetes. It provides full-stack IT automated operation and maintenance capabilities and simplifies the DevOps workflow of the enterprise. + +KubeSphere provides a friendly wizard-style operation interface for operation and maintenance, even users who are not experienced in Kubernetes can start management and use relatively easily. It provides a Helm-based application market that can easily install various Kubernetes applications under a graphical interface. + +This article will introduce how to deploy JuiceFS CSI Driver in KubeSphere with one click to provide data persistence for various applications on the cluster. + +## Prerequisites + +1. **Install KubeSphere** + +There are two ways to install KubeSphere. One is installing in Linux, you can refer to the document: [All-in-One Installation of Kubernetes and KubeSphere on Linux](https://kubesphere.com.cn/en/docs/quick-start/all-in-one-on-linux/) , One is installing in Kubernetes, you can refer to the document: [Minimal KubeSphere on Kubernetes](https://kubesphere.com.cn/en/docs/quick-start/minimal-kubesphere-on-k8s/) + +2. **Enable app store in KubeSphere** + +You can refer to the documentation for enabling the app store in KubeSphere: [KubeSphere App Store](https://kubesphere.com.cn/en/docs/pluggable-components/app-store/) + +## Install JuiceFS CSI Driver + +If the version of KubeSphere is v3.2.0 and above, you can install CSI Driver directly in the app store, skip the "Configure Application Template/Application Repository" step, and go directly to the "Install" step; if the KubeSphere version is lower than v3.2.0, follow the steps below to configure application templates/application repository. + +### Configure of Application Template/Application Repository + +To install JuiceFS CSI Driver, you first need to create an application template. There are two methods. + +#### Method one: Application Repository + +Click in the workspace to enter the application management, select "App Repositories", click the create button to add JuiceFS CSI Repository, fill in: + +- Repository name: juicefs-csi-driver +- Index URL:https://juicedata.github.io/juicefs-csi-driver/ + +![](../images/kubesphere_app_shop_en.png) + +#### Method two: Application Template + +Download the chart compressed package from the JuiceFS CSI Driver warehouse: https://github.com/juicedata/juicefs-csi-driver/releases. + +In the "Workspace", click to enter the "App Management", select "App Templates", click "create", upload the chart compression package: + +![](../images/kubesphere_app_template_en.png) + +### Install + +Select "Project" where you want to deploy in the "Workspace" (the project in KubeSphere is the namespace in K8s), select "Apps", click the "create" button, select "From App Store", and then Select "juicefs": + +![](../images/kubesphere_shop_juicefs_en.png) + +If KubeSphere version is lower than v3.2.0, select button "From App Template" according to the application template configured in the previous step: + +![](../images/kubesphere_install_csi_en.png) + +It's the same after entering the configuration modification page, modify the following two places: + +- namespace: Change to the corresponding project name +- storageClass.backend: + The `backend` part is used to define the backend database and object storage of the file system. You can refer + to ["JuiceFS Quick Start Guide"](../getting-started/for_local.md) for related content. + +You can also quickly create databases (such as Redis) and object storage (such as MinIO) by KubeSphere's app store. For example, build on the KubeSphere platform Redis: Select "Apps" in the current project, click the "create" button, select "From App Store", select "Redis", and then quickly deploy. The access URL of Redis can be the service name of the deployed application, as follows: + +![](../images/kubesphere_redis_en.png) + +Deploying MinIO on the KubeSphere platform is a similar process, but you can modify the accessKey and secretKey of MinIO before deploying MinIO, and you need to remember the configured values. As shown below: + +![](../images/kubesphere_create_minio_en.png) + +> Attention: If there are permissions error when deploying MinIO, you can set the `securityContext.enables` in the configuration to false. + +MinIO's access URL can be the service name of the deployed application, as follows: + +![](../images/kubesphere_minio_en.png) + +After both Redis and MinIO are set up, you can fill in the `backend` value of JuiceFS CSI Driver. + +1. `metaurl` is the database address of Redis just created, the access address of Redis can be the service name corresponding to the Redis application, such as `redis://redis-rzxoz6:6379/1` +2. `storage` is type of storage for the object, such as `minio` +3. `bucket` is the available bucket of MinIO just created (JuiceFS will automatically create it, no need to create it manually), the access address of MinIO can be the service name corresponding to the MinIO application, such as `http://minio-qkp9my:9000/minio/test` +4. `accessKey` and `secretKey` are the accessKey and secretKey of MinIO just created + +![](../images/kubesphere_update_csi_en.png) + +After the configuration is modified, click "Install". + +## Usage + +### Deploy application + +The JuiceFS CSI Driver installed above has created a `StorageClass`, for example, the `StorageClass` created above is `juicefs-sc` , Can be used directly. + +Then you need to create a PVC. In "Project", select "Storage Management", then select "Storage Volume", click the " Create" button to create a PVC, and select "juicefs-sc" for the "StorageClass", as follows: + +![](../images/kubesphere_pvc_en.png) + +After the PVC is created, in the "Apps" of "Project", select "Workloads", click "Create" button to deploy the workload, and fill in your favorite name on the "Basic Information" page; the "Container Image" page can fill in the mirror image `centos`; Start command `sh,-c,while true; do echo $(date -u) >> /data/out.txt; sleep 5; done`; "Mount Volume" select "Existing Volume", and then select PVC created in one step, fill in the path in the container with `/data` as follows: + +![](../images/kubesphere_deployment_en.png) + +![](../images/kubesphere_workload_en.png) + +After the deployment completed, you can see the running pod: + +![](../images/kubesphere_pod_en.png) + +### Create StorageClass + +If you did not create a `StorageClass` when installing JuiceFS CSI Driver, or you need to create a new one, you can follow the steps below: + +After preparing the metadata service and object storage service, create a new `Secret`. On the "Platform Management" page, select "Configuration", select "Secret", and click the "Create" button to create a new one: + +![](../images/kubesphere_secret_en.png) + +Fill in the metadata service and object storage information in "Data Settings", as follows: + +![](../images/kubesphere_update_secret_en.png) + +After creating `Secret`, create `StorageClass`, select "Storage" on the "Platform Management" page, select "Storage Classes", click the "Create" button to create a new one, and select "Custom" for "Storage Class": + +![](../images/kubesphere_sc_create_en.png) + +The setting page information is as follows, where "Storage System" fills in `csi.juicefs.com`, and 4 more parameters are set: + +- `csi.storage.k8s.io/provisioner-secret-name`: secret name +- `csi.storage.k8s.io/provisioner-secret-namespace`: project of secret +- `csi.storage.k8s.io/node-publish-secret-name`: secret name +- `csi.storage.k8s.io/node-publish-secret-namespace`: project of secret + +![](../images/kubesphere_sc_update_en.png) + +After clicking the "Create" button, the `StorageClass` is created. diff --git a/docs/en/tutorials/juicefs_on_rancher.md b/docs/en/tutorials/juicefs_on_rancher.md new file mode 100644 index 0000000..2cc1328 --- /dev/null +++ b/docs/en/tutorials/juicefs_on_rancher.md @@ -0,0 +1,107 @@ +--- +sidebar_label: Use JuiceFS on Rancher +sidebar_position: 2 +slug: /juicefs_on_rancher +--- +# Use JuiceFS on Rancher + +[Rancher](https://rancher.com/) is an enterprise-level Kubernetes cluster management system, which can be used to quickly complete the deployment of Kubernetes clusters on various cloud computing platforms. + +Rancher provides a browser-based management interface, even users who are not experienced in Kubernetes can start to manage and use easily. It is preset with Helm-based application market by default, and various Kubernetes applications can be installed very easy under the graphical interface. + +This article will introduce how to deploy Rancher on a Linux system and create a Kubernetes cluster with it, and then deploy JuiceFS CSI Driver with one click through the application market, thereby providing data persistence for various applications on the cluster. + +## Install Rancher + +Rancher can be installed on almost all modern Linux distributions. It can be installed directly on the operating system, or on Docker, Kubernetes, K3s or RKE. The installation is "Product-Ready" no matter which environment it is installed in. + +Here we choose to install Rancher on Docker, with the following requirements: + +- **Operating System**: Linux system with x86-64 architecture +- **Memory**: 4GB or more +- **Docker**: 19.03+ + +Run the following command to install Rancher: + +```shell +$ sudo docker run --privileged -d --restart=unless-stopped -p 80:80 -p 443:443 rancher/rancher +``` + +After the container is created, Rancher's management interface can be opened by accessing the IP address of the host. + +![](../images/rancher-welcome.jpeg) + +## Create a Kubernetes cluster + +After Rancher is installed, you can see that it has deployed a K3s cluster in the current container, and Rancher related resources are running in this internal K3s cluster, but we don't need to pay attention to this cluster now. + +Next, start to create a Kubernetes cluster. In the Cluster section of the welcome page, click `Create` to create a cluster. Rancher supports the creation of Kubernetes clusters on major cloud computing platforms. Here we need to create a cluster directly on Rancher's host, so choose `Custom`. Then fill in the cluster name according to the wizard and select the Kubernetes version. + +![](../images/rancher-cluster-create.jpg) + +In the `Cluster Options` page, select the node role to be created, then copy the generated command and execute it on the target host. + +![](../images/rancher-cluster-options.jpg) + +After the cluster is created, it will be displayed in Rancher's cluster list. + +![](../images/rancher-clusters.jpg) + +## One-click installation of JuiceFS CSI Driver + +In the cluster list, click to enter the Kubernetes cluster, click on the left navigation menu to expand `Apps & Marketplace` -> `Chart Repositories`, click the `Create` button to add JuiceFS CSI repository, fill in: + +- **Name**: juicefs +- **Index URL**: https://juicedata.github.io/juicefs-csi-driver/ + +![](../images/rancher-new-repo.jpg) + +And then, you can see the new repository in the list. + +![](../images/rancher-repos.jpg) + +Then click to open the `Apps & Marketplace` → `Charts` from the left menu, type `juicefs` in the search bar, and then click to open `juicefs-csi-driver`. + +![](../images/rancher-chart-search.jpg) + +Click the "Install" button on the application details page, the latest version will be installed by default, or you can click to switch to the historical version to install. + +![](../images/rancher-chart-info.jpg) + +The installation wizard has two steps: + +### Step 1: Set up the `Namespace` + +The JuiceFS CSI Driver defaults to `kube-system`, and there is no need to set this step. + +### Step 2: Adjust configuration parameters + +This page provides a YAML editor, you can adjust JuiceFS-related information according to your needs. Usually you only need to modify the `storageClasses` part, where the `backend` part is used to define the backend database and object storage of the file system. You can refer to [JuiceFS Quick Start Guide](../getting-started/for_local.md) to learn the detail. If you are using a pre-created file system, you only need to fill in the two items `metaurl` and `name`, for example: + +```yaml +... +storageClasses: + - backend: + accessKey: '' + bucket: '' + metaurl: 'redis://:mypasswd@efgh123.redis.rds.aliyuncs.com/1' + name: myjfs + secretKey: '' + storage: '' + enabled: true + name: juicefs-sc + reclaimPolicy: Retain +... +``` + +> **Tip**: If you have multiple JuiceFS file systems that need to be associated with different storageClasses in the Kubernetes cluster, you can add storageClass configuration items after the `storageClasses` array. Pay attention to modify the name of the storage class to avoid conflicts. + +Click "Install" and wait for the application installation to complete. + +![](../images/rancher-chart-installed.jpg) + +## Use JuiceFS to persist data + +When deploying an application, specify `juicefs-sc` in the storage configuration. + +![](../images/rancher-pvc.jpg) diff --git a/docs/en/tutorials/juicefs_on_wsl.md b/docs/en/tutorials/juicefs_on_wsl.md new file mode 100644 index 0000000..fa57403 --- /dev/null +++ b/docs/en/tutorials/juicefs_on_wsl.md @@ -0,0 +1,166 @@ +--- +sidebar_label: Use JuiceFS on WSL +--- +# Use JuiceFS on WSL + +WSL is called Windows Subsystem for Linux, which means Windows subsystem for Linux. It allows you to run most GNU/Linux native commands, tools, and programs in a Windows environment without the additional hardware overhead of using a virtual machine or dual system. + +## Installing WSL + +Using WSL requires Windows 10 2004 or higher or Windows 11. + +To check the current system version, you can call up the Run program by pressing Win + R. Type and run `winver`. + +![](../images/wsl/winver-en.png) + +After confirming the Windows version, open PowerShell or Windows Command Prompt as an administrator and run the installation command. + +```powershell +wsl --install +``` + +This command will download the latest Linux kernel, install and set WSL 2 as the default version, and install the Linux distribution (Ubuntu by default). + +You can also specify the distribution to be installed directly at: + +```powershell +wsl --install -d ubuntu +``` + +:::tip +`wsl --list --online` to view all available distributions. +::: + +## Setting up Linux users and passwords + +Once the WSL installation is complete, you can find the newly installed Linux distribution in the Start menu. + +![](../images/wsl/startmenu-en.png) + +By clicking on the Ubuntu subsystem shortcut, WSL will open the terminal of the Linux subsystem. The first time you run it, you will be asked to set the user and password for managing the Linux subsystem, just follow the prompts. + +![](../images/wsl/init.png) + +There are several points to note about the username and password set here: + +- This user is dedicated to the administration of this Linux subsystem and is not related to the users on the Windows system. +- This user will be the default user of the Linux subsystem and will be automatically logged in at boot time. +- this user will be considered as the administrator of the Linux subsystem and will be allowed to execute `sudo` commands. +- Multiple Linux subsystems are allowed to run at the same time in WSL, and each subsystem needs to have an administrative user. + +## Using JuiceFS in WSL + +Using JuiceFS in WSL means using JuiceFS on a Linux system, and here is an example of the Community Edition. + +### Install the client + +Install the JuiceFS client on the Linux subsystem by executing the following command in sequence. + +1. Get the latest version number + + ```shell + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') + ``` + +2. Download the client to the current directory + + ```shell + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +3. Unzip the installation package + + ```shell + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +4. Install + + ```shell + sudo install juicefs /usr/local/bin + ``` + +### Creating a file system + +JuiceFS is a distributed file system with data and metadata separated, usually using object storage as data storage and Redis, PostgreSQL or MySQL as metadata storage. It is assumed here that the following materials have been prepared. + +#### Object Storage + +View "[JuiceFS Supported Object Storage](../reference/how_to_setup_object_storage.md)" + +- **Bucket Endpoint**: `https://myjfs.oss-cn-shanghai.aliyuncs.com` +- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA` + +#### Database + +View "[JuiceFS Supported Metadata Engines](../reference/how_to_setup_metadata_engine.md)" + +- **Database URL**: `myjfs-sh-abc.redis.rds.aliyuncs.com:6379` +- **Database Password**: `mypassword` + +Write private information to environment variables: + +```shell +$ export ACCESS_KEY=ABCDEFGHIJKLMNopqXYZ +$ export SECRET_KEY=ZYXwvutsrqpoNMLkJiHgfeDCBA +$ export REDIS_PASSWORD=mypassword +``` + +Create a filesystem named `myjfs`: + +```shell +juicefs format \ + --storage oss \ + --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \ + redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + myjfs +``` + +### Mount and use + +Write the database password to the environment variable: + +```shell +export REDIS_PASSWORD=mypassword +``` + +:::note +Once the file system is created successfully, the corresponding key information will be written to the database and the JuiceFS client will automatically read it from the database when the file system is mounted, so there is no need to set it again. +::: + +Mount the file system to `mnt` in the user's home directory: + +```shell +sudo juicefs mount -d redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 $HOME/mnt +``` + +If you need to access the JuiceFS filesystem mounted on a Linux subsystem from a Windows system, find the Linux subsystem in the list on the left side of Explorer, then find and open the mount point path. + +![](../images/wsl/access-jfs-from-win-en.png) + +For more information on the use of JuiceFS, please refer to the official documentation. + +## WSL Storage Performance + +WSL bridges the Windows and Linux subsystems, allowing them to access each other's files stored on each other's systems. + +![](../images/wsl/windows-to-linux-en.png) + +Note, however, that accessing the Linux subsystem from Windows or accessing Windows from the Linux subsystem is bound to incur some performance overhead due to switching between systems. Therefore, the recommended practice is to decide where to store the files depending on the system where the program is located, and for programs in the Linux subsystem, the files it will be processing should also be stored in the Linux subsystem for better performance. + +In the Linux subsystem, WSL mounts each Windows drive to `/mnt`, for example, the mount point for the C: drive in the Linux subsystem is `/mnt/c`. + +![](../images/wsl/mount-point.png) + +To ensure optimal performance, when using JuiceFS in WSL, both the storage and cache paths should be set in the Linux subsystem. In other words, you should avoid setting the storage or cache on a Windows partition mount point like `/mnt/c`. + +Using the `bench` benchmarking tool that comes with JuiceFS, the results show that mounting a filesystem to Windows (e.g. `/mnt/c`) has about 30% lower performance than mounting it inside a Linux subsystem (e.g. `$HOME/mnt`). + +## Known Issues + +When copying files to a Linux subsystem via Windows Explorer, WSL automatically appends a file of the same name with the `Zone.Identifier` identifier to each file. This is an NTFS file system security mechanism intended to track the origin of external files, but it is a bug for WSL and has been reported to the Microsoft development team on GitHub [#7456](https://github.com/microsoft/WSL/issues/7456). + +This issue also affects the same problem when saving files to a mounted JuiceFS file system in the Linux subsystem via Windows Explorer. However, reading and writing JuiceFS file systems inside the Linux subsystem is not affected by this bug. + +![](../images/wsl/zone-identifier-en.png) diff --git a/docs/en/tutorials/qcloud.md b/docs/en/tutorials/qcloud.md new file mode 100644 index 0000000..f15f62d --- /dev/null +++ b/docs/en/tutorials/qcloud.md @@ -0,0 +1,296 @@ +--- +sidebar_label: Use JuiceFS on Tencent Cloud +sidebar_position: 7 +slug: /clouds/qcloud +--- +# Use JuiceFS on Tencent Cloud + +JuiceFS needs to be used with database and object storage together. Here we directly use Tencent Cloud's CVM cloud server, combined with cloud database and COS object storage. + +## Preparation + +When creating cloud computing resources, try to choose the same region, so that resources can access each other through intranet and avoid extra traffic costs by using public network. + +### 1. CVM + +JuiceFS has no special requirements for server hardware, and the minimum specification of CVM can use JuiceFS stably, usually you just need to choose the configuration that can meet your business. + +In particular, you do not need to buy a new server or reinstall the system to use JuiceFS, JuiceFS is not business invasive and will not cause any interference with your existing systems and programs, you can install and use JuiceFS on your running server. + +By default, JuiceFS takes up 1GB of hard disk space for caching, and you can adjust the size of the cache space as needed. This cache is a data buffer layer between the client and the object storage, and you can get better performance by choosing a cloud drive with better performance. + +JuiceFS can be installed on all operating systems provided by Tencent Cloud CVM. + +**The specifications of CVM used in this article are as follows:** + +| Server Specifications | | +| --------------------- | ------------------------ | +| **CPU** | 1 Core | +| **RAM** | 2 GB | +| **Storage** | 50 GB | +| **OS** | Ubuntu Server 20.04 64位 | +| **Location** | Shanghai 5 | + +### 2. Database + +JuiceFS will store all the metadata corresponding to the data in a separate database, and the supported databases are Redis, MySQL, PostgreSQL, TiKV and SQLite. + +Depending on the database type, the performance and reliability of metadata varies. For example, Redis runs entirely on memory, which provides the ultimate performance, but is difficult to operate and maintain, and has relatively low reliability. SQLite is a single-file relational database with low performance and is not suitable for large-scale data storage, but it is configuration-free and suitable for scenarios with small amounts of data storage. + +If you are just evaluating the capabilities of JuiceFS, you can manually build the database for use in the CVM. When you want to use JucieFS in a production environment, the cloud database service of Tencent Cloud is usually a better choice if you don't have a professional database operation and maintenance team. + +Of course, you can also use cloud database services provided on other cloud platforms if you wish.However, in this case, you can only access the cloud database through the public network, which means that you must expose the database port to the public network, which has some security risks and requires special attention. + +If you must access the database through the public network, you can enhance the security of your data by strictly limiting the IP addresses that are allowed to access the database through the whitelist feature provided by the cloud database console. On the other hand, if you cannot connect to the cloud database through the public network, then you can check the whitelist of the database. + +| Database | Redis | MySQL、PostgreSQL | SQLite | +| :-------------: | :-----------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| **Performance** | High | Medium | Low | +| **Management** | High | Medium | Low | +| **Reliability** | Low | Medium | Low | +| **Scenario** | Massive data, distributed high-frequency read and write | Massive data, distributed low and medium frequency read and write | Low frequency read and write in single machine for small amount of data | + +> **Note**: If you use [JuiceFS Hosted Service](https://juicefs.com/docs/en/hosted_service.html), you do not need to prepare a database. + +**This article uses the TencentDB for Redis, which is accessed through a VPC private network interacting with the CVM:** + +| Redis version | 5.0 community edition | +| --------------------------- | ------------------------------------------ | +| **Instance Specification** | 1GB Memory Edition (standard architecture) | +| **Connection Address** | 192.168.5.5:6379 | +| **Available Zone** | Shanghai 5 | + +Note that the database connection address depends on the VPC network settings you create, and that creating a Redis instance automatically gets the address in the network segment you define. + +### 3. Object Storage COS + +JuiceFS stores all data in object storage, and it supports almost all object storage services. However, for the best performance, when using Tencent Cloud CVM, pairing it with Tencent Cloud COS Object Storage is usually the optimal choice. However, please note that selecting CVM and COS Bucket in the same region so that they can be accessed through Tencent Cloud's intranet not only has low latency, but also does not require additional traffic costs. + +> **Hint**: The unique access address provided by Tencent Cloud COS supports both intranet and extranet access. When accessing through the intranet, COS will automatically resolve to the intranet IP, and the traffic generated at this time is all intranet traffic, which will not incur traffic costs. + +Of course, if you want, you can also use object storage services provided by other cloud platforms, but it is not recommended to do so. First of all, if you access the object storage of other cloud platforms through Tencent Cloud CVM, you have to take the public network, and the object storage will incur traffic costs, and the access latency will be higher compared to this, which may affect the performance of JuiceFS. + +Tencent Cloud COS has different storage levels, and since JuiceFS needs to interact with object storage frequently, it is recommended to use standard storage. You can use it with COS resource package to reduce the cost. + +### API Access Secret Key + +Tencent Cloud COS needs to be accessed through API, you need to prepare the access secret key, including `Access Key ID` and `Access Key Secret`, [click here to view](https://intl.cloud.tencent.com/document/product/598/32675) to get the way. + +> **Security Advisory**: Explicit use of the API access secret key may lead to key compromise and it is recommended to assign [CAM Service Role](https://intl.cloud.tencent.com/document/product/598/19420) to the cloud server. Once a CVM has been granted COS operation privileges, it can access the COS without using the API access key. + +## Installation + +Here we are using Ubuntu Server 20.04 64-bit system, and the latest version of the client can be downloaded by running the following commands. You can also choose another version by visiting the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page. + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +After downloading, unzip the program into the `juice` folder. + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +Install the JuiceFS client to `/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +Execute the command and see the help message `juicefs` returned, which means the client installation is successful. + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.15.2 (2021-07-07T05:51:36Z 4c16847) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +JuiceFS has good cross-platform compatibility and is supported on Linux, Windows and macOS. This article focuses on the installation and use of JuiceFS on Linux, if you need to know how to install it on other systems, please [check the documentation](../getting-started/installation.md). + +## Creating JuiceFS + +Once the JuiceFS client is installed, you can now create the JuiceFS storage using the Redis database and COS you prepared earlier. + +Technically speaking, this step should be called "Format a volume". However, since many users may not understand or care about the standard file system terminology, we will simply call the process "Create JuiceFS Storage". + +The following command creates a storage called `mystor`, i.e., a file system, using the `format` subcommand provided by the JuiceFS client. + +```shell +$ juicefs format \ + --storage cos \ + --bucket https:// \ + --access-key \ + --secret-key \ + redis://:@192.168.5.5:6379/1 \ + mystor +``` + +**Option description:** + +- `--storage`: Specify the type of object storage. +- `---bucket`: Bucket access domain of the object store, which can be found in the COS management console. +- `--access-key` and `--secret-key`: the secret key pair for accessing the Object Storage API, [click here to view](https://intl.cloud.tencent.com/document/product/598/32675) to get it. + +> Redis 6.0 authentication requires two parameters, username and password, and the address format is `redis://username:password@redis-server-url:6379/1`. Currently, the Redis version of Tencent Cloud Database only provides Reids 4.0 and 5.0, which only requires a password for authentication. When setting the Redis server address, you only need to leave the username empty, for example: `redis://:password@redis-server-url:6379/1` + +Output like the following means the file system was created successfully. + +```shell +2021/07/30 11:44:31.904157 juicefs[44060] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:44:31.907083 juicefs[44060] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:44:31.907634 juicefs[44060] : Ping redis: 474.98µs +2021/07/30 11:44:31.907850 juicefs[44060] : Data uses cos://juice-0000000000/mystor/ +2021/07/30 11:44:32.149692 juicefs[44060] : Volume is formatted as {Name:mystor UUID:dbf05314-57af-4a2c-8ac1-19329d73170c Storage:cos Bucket:https://juice-0000000000.cos.ap-shanghai.myqcloud.com AccessKey:AKIDGLxxxxxxxxxxxxxxxxxxZ8QRBdpkOkp SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +## Mount JuiceFS + +When the file system is created, the information related to the object storage is stored in the database, so there is no need to enter information such as the bucket domain and secret key when mounting. + +Use the `mount` subcommand to mount the file system to the `/mnt/jfs` directory. + +```shell +$ sudo juicefs mount -d redis://:@192.168.5.5:6379/1 /mnt/jfs +``` + +> **Note**: When mounting the file system, only the Redis database address is required, not the file system name. The default cache path is `/var/jfsCache`, please make sure the current user has enough read/write permissions. + +Output similar to the following means that the file system was mounted successfully. + +```shell +2021/07/30 11:49:56.842211 juicefs[44175] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:49:56.845100 juicefs[44175] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:49:56.845562 juicefs[44175] : Ping redis: 383.157µs +2021/07/30 11:49:56.846164 juicefs[44175] : Data use cos://juice-0000000000/mystor/ +2021/07/30 11:49:56.846731 juicefs[44175] : Disk cache (/var/jfsCache/dbf05314-57af-4a2c-8ac1-19329d73170c/): capacity (1024 MB), free ratio (10%), max pending pages (15) +2021/07/30 11:49:57.354763 juicefs[44175] : OK, mystor is ready at /mnt/jfs +``` + +Using the `df` command, you can see how the file system is mounted. + +```shell +$ df -Th +File system type capacity used usable used% mount point +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +After the file system is successfully mounted, you can now store data in the `/mnt/jfs` directory as if you were using a local hard drive. + +> **Multi-Host Sharing**: JuiceFS storage supports being mounted by multiple cloud servers at the same time. You can install the JuiceFS client on other could server and then use `redis://:@herald-sh-abc.redis.rds.aliyuncs. com:6379/1` database address to mount the file system on each host. + +## File System Status + +Use the `status` subcommand of the JuiceFS client to view basic information and connection status of a file system. + +```shell +$ juicefs status redis://:@192.168.5.5:6379/1 + +2021/07/30 11:51:17.864767 juicefs[44196] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:51:17.866619 juicefs[44196] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:51:17.867092 juicefs[44196] : Ping redis: 379.391µs +{ + "Setting": { + "Name": "mystor", + "UUID": "dbf05314-57af-4a2c-8ac1-19329d73170c", + "Storage": "cos", + "Bucket": "https://juice-0000000000.cos.ap-shanghai.myqcloud.com", + "AccessKey": "AKIDGLxxxxxxxxxxxxxxxxx8QRBdpkOkp", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 1, + "Heartbeat": "2021-07-30T11:49:56+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "VM-5-6-ubuntu", + "MountPoint": "/mnt/jfs", + "ProcessID": 44175 + }, + { + "Sid": 3, + "Heartbeat": "2021-07-30T11:50:56+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "VM-5-6-ubuntu", + "MountPoint": "/mnt/jfs", + "ProcessID": 44185 + } + ] +} +``` + +## Unmount JuiceFS + +The file system can be unmounted using the `umount` command provided by the JuiceFS client, e.g. + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **Note**: Forced unmount of the file system in use may result in data corruption or loss, so please be sure to proceed with caution. + +## Auto-mount on Boot + +If you don't want to manually remount JuiceFS storage on reboot, you can set up automatic mounting of the file system. + +First, you need to rename the `juicefs` client to `mount.juicefs` and copy it to the `/sbin/` directory. + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +Edit the `/etc/fstab` configuration file and add a new record. + +```shell +redis://:@192.168.5.5:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +The mount option `cache-size=20480` means to allocate 20GB local disk space for JuiceFS cache use. Generally speaking, allocating more cache space for JuiceFS will result in better performance. + +You can adjust the FUSE mount options in the above configuration as needed, for more information please [check the documentation](../reference/fuse_mount_options.md). + +> **Note**: Please replace the Redis address, mount point, and mount options in the above configuration file with your actual information. diff --git a/docs/en/windows.md b/docs/en/windows.md new file mode 100644 index 0000000..e672c27 --- /dev/null +++ b/docs/en/windows.md @@ -0,0 +1,64 @@ +# Using JuiceFS on Windows + + +## Install dependencies + +JuiceFS depends on [WinFsp](http://www.secfs.net/winfsp/rel), please install it first. + + +## Build JuiceFS from source + +We can cross compile JuiceFS for Windows platform on Linux or macOS. + +1. Install [mingw-w64](http://mingw-w64.org) on Linux or macOS. + + On Linux, it can be installed using the distro's package manager like `yum` or `apt`. + + On macOS, use [Homebrew](https://brew.sh) to install: `brew install mingw-w64` + +2. Build JuiceFS for Windows: + +```bash +$ git clone https://github.com/juicedata/juicefs.git +$ cd juicefs +$ make juicefs.exe +``` + + +## Use JuiceFS + +### Start Redis Server + +JuiceFS requires a Redis, there is a [Windows version of Redis](https://github.com/tporadowski/redis), +please download the latest release and launch the Redis server. + + +### Format JuiceFS + +For test purpose, we can use a local disk to simulate an object store: + +``` +PS C:\> .\juicefs.exe format localhost test +2021/03/22 15:16:18.003547 juicefs[7064] : Meta address: redis://localhost +2021/03/22 15:16:18.022972 juicefs[7064] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/03/22 15:16:18.024710 juicefs[7064] : Data use file:///C:/jfs/local/test/ +``` + +For other supported object storage, please check out ["How to Setup Object Storage"](reference/how_to_setup_object_storage.md). + +### Mount JuiceFS + +Select an unused drive letter, such as `Z:`, then execute: + +``` +PS C:\> .\juicefs.exe mount localhost Z: +2021/03/22 15:16:18.003547 juicefs[7064] : Meta address: redis://localhost +2021/03/22 15:16:18.022972 juicefs[7064] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/03/22 15:16:18.024710 juicefs[7064] : Data use file:///C:/jfs/local/test/ +2021/03/22 15:16:18.024710 juicefs[7064] : Cache: C:\Users\bob\.juicefs\cache\7088b6fa-ef2b-4792-b6c9-98fcdd6d45fb capacity: 1024 MB +The service juicefs has been started. +``` + +Then we can use JuiceFS as a shared disk drive `Z:`, it looks like: + +![JuiceFS on Windows](images/juicefs-on-windows.png) diff --git a/docs/zh_cn/README.md b/docs/zh_cn/README.md new file mode 100644 index 0000000..c446044 --- /dev/null +++ b/docs/zh_cn/README.md @@ -0,0 +1,20 @@ +# JuiceFS 用户手册 + +[![license](https://img.shields.io/badge/license-Apache%20v2.0-blue)](https://github.com/juicedata/juicefs/blob/main/LICENSE) [![Go Report](https://img.shields.io/badge/go%20report-A+-brightgreen.svg?style=flat)](https://goreportcard.com/badge/github.com/juicedata/juicefs) [![Join Slack](https://badgen.net/badge/Slack/Join%20JuiceFS/0abd59?icon=slack)](https://join.slack.com/t/juicefs/shared_invite/zt-n9h5qdxh-0bJojPaql8cfFgwerDQJgA) + +![JuiceFS LOGO](images/juicefs-logo.png) + +JuiceFS 是一款高性能 [POSIX](https://en.wikipedia.org/wiki/POSIX) 文件系统,针对云原生环境特别优化设计,在 Apache 2.0 开源协议下发布。使用 JuiceFS 存储数据,数据本身会被持久化在对象存储(例如,Amazon S3),而数据所对应的元数据可以根据场景需求被持久化在 Redis、MySQL、SQLite 等多种数据库引擎中。JuiceFS 可以简单便捷的将海量云存储直接接入已投入生产环境的大数据、机器学习、人工智能以及各种应用平台,无需修改代码即可像使用本地存储一样高效使用海量云端存储。 + +## 核心特性 + +1. **POSIX 兼容**:像本地文件系统一样使用,无缝对接已有应用,无业务侵入性; +2. **HDFS 兼容**:完整兼容 [HDFS API](deployment/hadoop_java_sdk.md),提供更强的元数据性能; +3. **S3 兼容**:提供 [S3 网关](deployment/s3_gateway.md) 实现 S3 协议兼容的访问接口; +4. **云原生**:通过 [Kubernetes CSI Driver](deployment/how_to_use_on_kubernetes.md) 可以很便捷地在 Kubernetes 中使用 JuiceFS; +5. **多端共享**:同一文件系统可在上千台服务器同时挂载,高性能并发读写,共享数据; +6. **强一致性**:确认的修改会在所有挂载了同一文件系统的服务器上立即可见,保证强一致性; +7. **强悍性能**:毫秒级的延迟,近乎无限的吞吐量(取决于对象存储规模),查看[性能测试结果](benchmark/benchmark.md); +8. **数据安全**:支持传输中加密(encryption in transit)以及静态加密(encryption at rest),[查看详情](security/encrypt.md); +9. **文件锁**:支持 BSD 锁(flock)及 POSIX 锁(fcntl); +10. **数据压缩**:支持使用 [LZ4](https://lz4.github.io/lz4) 或 [Zstandard](https://facebook.github.io/zstd) 压缩数据,节省存储空间; diff --git a/docs/zh_cn/administration/cache_management.md b/docs/zh_cn/administration/cache_management.md new file mode 100644 index 0000000..e8fa8d4 --- /dev/null +++ b/docs/zh_cn/administration/cache_management.md @@ -0,0 +1,110 @@ +--- +sidebar_label: 缓存 +sidebar_position: 5 +slug: /cache_management +--- +# 缓存 + +对于一个由对象存储和数据库组合驱动的文件系统,缓存是本地客户端与远端服务之间高效交互的重要纽带。读写的数据可以提前或者异步载入缓存,再由客户端在后台与远端服务交互执行异步上传或预取数据。相比直接与远端服务交互,采用缓存技术可以大大降低存储操作的延时并提高数据吞吐量。 + +JuiceFS 提供包括元数据缓存、数据读写缓存等多种缓存机制。 + +## 数据一致性 + +JuiceFS 提供「关闭再打开(close-to-open)」一致性保证,即当两个及以上客户端同时读写相同的文件时,客户端 A 的修改在客户端 B 不一定能立即看到。但是,一旦这个文件在客户端 A 写入完成并关闭,之后在任何一个客户端重新打开该文件都可以保证能访问到最新写入的数据,不论是否在同一个节点。 + +「关闭再打开」是 JuiceFS 提供的最低限度一致性保证,在某些情况下可能也不需要重新打开文件才能访问到最新写入的数据。例如多个应用程序使用同一个 JuiceFS 客户端访问相同的文件(文件变更立即可见),或者在不同节点上通过 `tail -f` 命令查看最新数据。 + +## 元数据缓存 + +JuiceFS 支持在内核和客户端内存(即 JuiceFS 进程)中缓存元数据以提升元数据的访问性能。 + +### 内核元数据缓存 + +内核中可以缓存三种元数据:**属性(attribute)**、**文件项(entry)**和**目录项(direntry)**,可以通过以下[挂载参数](../reference/command_reference.md#juicefs-mount)控制缓存时间: + +``` +--attr-cache value 属性缓存时长,单位秒 (默认值: 1) +--entry-cache value 文件项缓存时长,单位秒 (默认值: 1) +--dir-entry-cache value 目录项缓存时长,单位秒 (默认值: 1) +``` + +JuiceFS 默认会在内核中缓存属性、文件项和目录项,缓存时长 1 秒,以提高 lookup 和 getattr 的性能。当多个节点的客户端同时使用同一个文件系统时,内核中缓存的元数据只能通过时间失效。也就是说,极端情况下可能出现节点 A 修改了某个文件的元数据(如 `chown`),通过节点 B 访问未能立即看到更新的情况。当然,等缓存过期后,所有节点最终都能看到 A 所做的修改。 + +### 客户端内存元数据缓存 + +> **注意**:此特性需要使用 0.15.2 及以上版本的 JuiceFS。 + +JuiceFS 客户端在 `open()` 操作即打开一个文件时,其文件属性(attribute)会被自动缓存在客户端内存中。如果在挂载文件系统时设置了 [`--open-cache`](../reference/command_reference.md#juicefs-mount) 选项且值大于 0,只要缓存尚未超时失效,随后执行的 `getattr()` 和 `open()` 操作会从内存缓存中立即返回结果。 + +执行 `read()` 操作即读取一个文件时,文件的 chunk 和 slice 信息会被自动缓存在客户端内存。在缓存有效期内,再次读取 chunk 会从内存缓存中立即返回 slice 信息。 + +> **提示**:您可以查阅[「JuiceFS 如何存储文件」](../reference/how_juicefs_store_files.md)了解 chunk 和 slice 是什么。 + +默认情况下,对于一个元数据已经被缓存在内存的文件,超过 1 小时没有被任何进程访问,其所有元数据缓存会被自动删除。 + +## 数据缓存 + +JuiceFS 对数据也提供多种缓存机制来提高性能,包括内核中的页缓存和客户端所在节点的本地缓存。 + +### 内核数据缓存 + +> **注意**:此特性需要使用 0.15.2 及以上版本的 JuiceFS。 + +对于已经读过的文件,内核会把它的内容自动缓存下来,随后再打开该文件,如果文件没有被更新(即 mtime 没有更新),就可以直接从内核中的缓存读取该文件,从而获得最好的性能。 + +得益于内核缓存,重复读取 JuiceFS 中相同文件的速度会非常快,延时可低至微秒,吞吐量可以到每秒数 GiB。 + +JuiceFS 客户端目前还未默认启用内核的写入缓存功能,从 [Linux 内核 3.15](https://github.com/torvalds/linux/commit/4d99ff8f12e) 开始,FUSE 支持[「writeback-cache 模式」](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt),这意味着可以非常快速地完成 `write()` 系统调用。你可以在[挂载文件系统](../reference/command_reference.md#juicefs-mount)时设置 [`-o writeback_cache`](../reference/fuse_mount_options.md#writeback_cache) 选项开启 writeback-cache 模式。当需要频繁写入非常小的数据(如 100 字节左右)时,建议启用此挂载选项。 + +### 客户端读缓存 + +JuiceFS 客户端会根据读取模式自动预读数据放入缓存,从而提高顺序读的性能。默认情况下,会在读取数据时并发预读 1 个 block 缓存在本地。本地缓存可以设置在基于机械硬盘、SSD 或内存的任意本地文件系统。 + +> **提示**:您可以查阅[「JuiceFS 如何存储文件」](../reference/how_juicefs_store_files.md)了解 block 是什么。 + +本地缓存可以在[挂载文件系统](../reference/command_reference.md#juicefs-mount)时通过以下选项调整: + +``` +--prefetch value 并发预读 N 个块 (默认: 1) +--cache-dir value 本地缓存目录路径;使用冒号隔离多个路径 (默认: "$HOME/.juicefs/cache" 或 "/var/jfsCache") +--cache-size value 缓存对象的总大小;单位为 MiB (默认: 102400) +--free-space-ratio value 最小剩余空间比例 (默认: 0.1) +--cache-partial-only 仅缓存随机小块读 (默认: false) +``` + +特别地,如果希望将 JuiceFS 的本地缓存存储在内存中有两种方式,一种是将 `--cache-dir` 设置为 `memory`,另一种是将其设置为 `/dev/shm/`。这两种方式的区别是前者在重新挂载 JuiceFS 文件系统之后缓存数据就清空了,而后者还会保留,性能上两者没有太大差别。 + +JuiceFS 客户端会尽可能快地把从对象存储下载的数据(包括新上传的小于 1 个 block 大小的数据)写入到缓存目录中,不做压缩和加密。**因为 JuiceFS 会为所有写入对象存储的 block 对象生成唯一的名字,而且所有 block 对象不会被修改,因此当文件内容更新时,不用担心缓存的数据失效问题。** + +缓存在使用空间到达上限(即缓存大小大于等于 `--cache-size`)或磁盘将被存满(即磁盘可用空间比例小于 `--free-space-ratio`)时会自动进行清理,目前的规则是根据访问时间,优先清理不频繁访问的文件。 + +数据缓存可以有效地提高随机读的性能,对于像 Elasticsearch、ClickHouse 等对随机读性能要求更高的应用,建议将缓存路径设置在速度更快的存储介质上并分配更大的缓存空间。 + +### 客户端写缓存 + +写入数据时,JuiceFS 客户端会把数据缓存在内存,直到当一个 chunk 被写满或通过 `close()` 或 `fsync()` 强制操作时,数据才会被上传到对象存储。在调用 `fsync()` 或 `close()` 时,客户端会等数据写入对象存储并通知元数据服务后才会返回,从而确保数据完整。 + +在某些情况下,如果本地存储是可靠的,且本地存储的写入性能明显优于网络写入(如 SSD 盘),可以通过启用异步上传数据的方式提高写入性能,这样一来 `close()` 操作不会等待数据写入到对象存储,而是在数据写入本地缓存目录就返回。 + +异步上传功能默认关闭,可以通过以下选项启用: + +``` +--writeback 后台异步上传对象 (默认: false) +``` + +当需要短时间写入大量小文件时,建议使用 `--writeback` 参数挂载文件系统以提高写入性能,写入完成之后可考虑取消该选项重新挂载以使后续的写入数据获得更高的可靠性。另外,像 MySQL 的增量备份等需要大量随机写操作的场景时也建议启用 `--writeback`。 + +> **警告**:当启用了异步上传,即挂载文件系统时指定了 `--writeback` 时,千万不要删除 `//rawstaging` 目录中的内容,否则会导致数据丢失。 + +当缓存磁盘将被写满时,会暂停写入数据,改为直接上传数据到对象存储(即关闭客户端写缓存功能)。 + +启用异步上传功能时,缓存本身的可靠性与数据写入的可靠性直接相关,对数据可靠性要求高的场景应谨慎使用。 + +## 常见问题 + +### 为什么我设置了缓存容量为 50 GiB,但实际占用了 60 GiB 的空间? + +对于总量相同的缓存数据,在不同的文件系统上会有不同的容量计算规则。JuiceFS 目前是通过累加所有被缓存对象的大小并附加固定的开销(4KiB)来估算得到的,这与 `du` 命令得到的数值并不完全一致。 + +为防止缓存盘被写满,当缓存目录所在文件系统空间不足时,客户端会尽量减少缓存用量。 diff --git a/docs/zh_cn/administration/destroy.md b/docs/zh_cn/administration/destroy.md new file mode 100644 index 0000000..e8b01f4 --- /dev/null +++ b/docs/zh_cn/administration/destroy.md @@ -0,0 +1,77 @@ +# 如何销毁文件系统 + +JuiceFS 客户端提供了 `destroy` 命令用以彻底销毁一个文件系统,销毁操作将会产生以下结果: + +- 清空此文件系统的全部元数据记录; +- 清空此文件系统的全部数据块 + +销毁文件系统的命令格式如下: + +```shell +juicefs destroy +``` + +- ``:元数据引擎的 URL 地址; +- ``:文件系统的 UUID。 + +## 查找文件系统的 UUID + +JuiceFS 客户端的 `status` 命令可以查看一个文件系统的详细信息,只需指定文件系统的元数据引擎 URL 即可,例如: + +```shell {7} +$ juicefs status redis://127.0.0.1:6379/1 + +2022/01/26 21:41:37.577645 juicefs[31181] : Meta address: redis://127.0.0.1:6379/1 +2022/01/26 21:41:37.578238 juicefs[31181] : Ping redis: 55.041µs +{ + "Setting": { + "Name": "macjfs", + "UUID": "eabb96d5-7228-461e-9240-fddbf2b576d8", + "Storage": "file", + "Bucket": "jfs/", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 1 + }, + ... +} +``` + +## 销毁文件系统 + +:::danger 危险操作 +销毁操作将导致文件系统关联的数据库记录和对象存储中的数据全部被清空,请务必先备份重要数据后再操作! +::: + +```shell +$ juicefs destroy redis://127.0.0.1:6379/1 eabb96d5-7228-461e-9240-fddbf2b576d8 + +2022/01/26 21:52:17.488987 juicefs[31518] : Meta address: redis://127.0.0.1:6379/1 +2022/01/26 21:52:17.489668 juicefs[31518] : Ping redis: 55.542µs + volume name: macjfs + volume UUID: eabb96d5-7228-461e-9240-fddbf2b576d8 +data storage: file://jfs/ + used bytes: 18620416 + used inodes: 23 +WARNING: The target volume will be destoried permanently, including: +WARNING: 1. objects in the data storage +WARNING: 2. entries in the metadata engine +Proceed anyway? [y/N]: y +deleting objects: 68 +The volume has been destroyed! You may need to delete cache directory manually. +``` + +在销毁文件系统时,客户端会发出确认提示,请务必仔细核对文件系统信息,确认无误后输入 `y` 确认。 + +## 常见错误 + +```shell +2022/01/26 21:47:30.949149 juicefs[31483] : 1 sessions are active, please disconnect them first +``` + +如果收到类似上面的错误提示,说明文件系统没有被妥善卸载,请检查并确认卸载了所有挂载点后再行操作。 diff --git a/docs/zh_cn/administration/fault_diagnosis_and_analysis.md b/docs/zh_cn/administration/fault_diagnosis_and_analysis.md new file mode 100644 index 0000000..3fb8a4a --- /dev/null +++ b/docs/zh_cn/administration/fault_diagnosis_and_analysis.md @@ -0,0 +1,115 @@ +--- +sidebar_label: 故障诊断和分析 +sidebar_position: 9 +slug: /fault_diagnosis_and_analysis +--- + +# JuiceFS 故障诊断和分析 + +## 错误日志 + +当 JuiceFS 通过 `-d` 选项在后台运行时,日志会输出到系统日志和 `/var/log/juicefs.log`(v0.15+,参见 [`--log` 选项](../reference/command_reference.md#juicefs-mount))。取决于你使用的操作系统,你可以通过不同的命令获取日志: + +```bash +# macOS +$ syslog | grep 'juicefs' + +# Debian based system +$ cat /var/log/syslog | grep 'juicefs' + +# CentOS based system +$ cat /var/log/messages | grep 'juicefs' + +# v0.15+ +$ tail -n 100 /var/log/juicefs.log +``` + +日志等级有 4 种。你可以使用 `grep` 命令过滤显示不同等级的日志信息,从而进行性能统计和故障追踪。 + +```bash +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +$ cat /var/log/syslog | grep 'juicefs' | grep '' +``` + +## 访问日志 + +JuiceFS 的根目录中有一个名为 `.accesslog` 的虚拟文件,它记录了文件系统上的所有操作及其花费的时间,例如: + +```bash +$ cat /jfs/.accesslog +2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010> +2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014> +2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006> +``` + +每行的最后一个数字是当前操作花费的时间(以秒为单位)。 您可以用它调试和分析性能问题,或者尝试使用 `juicefs profile /jfs` 查看实时统计信息。运行 `juicefs profile -h` 或[点此](../benchmark/operations_profiling.md)了解该命令的更多信息。 + +## 运行时信息 + +JuiceFS 客户端默认会通过 [pprof](https://pkg.go.dev/net/http/pprof) 在本地监听一个 TCP 端口用以获取运行时信息,如 Goroutine 堆栈信息、CPU 性能统计、内存分配统计。你可以通过系统命令(如 `lsof`)查看当前 JuiceFS 客户端监听的具体端口号: + +:::note 注意 +如果 JuiceFS 是通过 root 用户挂载,那么需要在 `lsof` 命令前加上 `sudo`。 +::: + +```bash +$ lsof -i -nP | grep LISTEN | grep juicefs +juicefs 32666 user 8u IPv4 0x44992f0610d9870b 0t0 TCP 127.0.0.1:6061 (LISTEN) +juicefs 32666 user 9u IPv4 0x44992f0619bf91cb 0t0 TCP 127.0.0.1:6071 (LISTEN) +juicefs 32666 user 15u IPv4 0x44992f062886fc5b 0t0 TCP 127.0.0.1:9567 (LISTEN) +``` + +默认 pprof 监听的端口号范围是从 6060 开始至 6099 结束,因此上面示例中对应的实际端口号是 6061。在获取到监听端口号以后就可以通过 `http://localhost:/debug/pprof` 地址查看所有可供查询的运行时信息,一些重要的运行时信息如下: + +- Goroutine 堆栈信息:`http://localhost:/debug/pprof/goroutine?debug=1` +- CPU 性能统计:`http://localhost:/debug/pprof/profile?seconds=30` +- 内存分配统计:`http://localhost:/debug/pprof/heap` + +为了便于分析这些运行时信息,可以将它们保存到本地,例如: + +```bash +$ curl 'http://localhost:/debug/pprof/goroutine?debug=1' > juicefs.goroutine.txt +$ curl 'http://localhost:/debug/pprof/profile?seconds=30' > juicefs.cpu.pb.gz +$ curl 'http://localhost:/debug/pprof/heap' > juicefs.heap.pb.gz +``` + +如果你安装了 `go` 命令,那么可以通过 `go tool pprof` 命令直接分析,例如分析 CPU 性能统计: + +```bash +$ go tool pprof 'http://localhost:/debug/pprof/profile?seconds=30' +Fetching profile over HTTP from http://localhost:/debug/pprof/profile?seconds=30 +Saved profile in /Users/xxx/pprof/pprof.samples.cpu.001.pb.gz +Type: cpu +Time: Dec 17, 2021 at 1:41pm (CST) +Duration: 30.12s, Total samples = 32.06s (106.42%) +Entering interactive mode (type "help" for commands, "o" for options) +(pprof) top +Showing nodes accounting for 30.57s, 95.35% of 32.06s total +Dropped 285 nodes (cum <= 0.16s) +Showing top 10 nodes out of 192 + flat flat% sum% cum cum% + 14.73s 45.95% 45.95% 14.74s 45.98% runtime.cgocall + 7.39s 23.05% 69.00% 7.41s 23.11% syscall.syscall + 2.92s 9.11% 78.10% 2.92s 9.11% runtime.pthread_cond_wait + 2.35s 7.33% 85.43% 2.35s 7.33% runtime.pthread_cond_signal + 1.13s 3.52% 88.96% 1.14s 3.56% runtime.nanotime1 + 0.77s 2.40% 91.36% 0.77s 2.40% syscall.Syscall + 0.49s 1.53% 92.89% 0.49s 1.53% runtime.memmove + 0.31s 0.97% 93.86% 0.31s 0.97% runtime.kevent + 0.27s 0.84% 94.70% 0.27s 0.84% runtime.usleep + 0.21s 0.66% 95.35% 0.21s 0.66% runtime.madvise +``` + +也可以将运行时信息导出为可视化图表,以更加直观的方式进行分析。可视化图表支持导出为多种格式,如 HTML、PDF、SVG、PNG 等。例如导出内存分配统计信息为 PDF 文件的命令如下: + +:::note 注意 +导出为可视化图表功能依赖 [Graphviz](https://graphviz.org),请先将它安装好。 +::: + +```bash +$ go tool pprof -pdf 'http://localhost:/debug/pprof/heap' > juicefs.heap.pdf +``` + +关于 pprof 的更多信息,请查看[官方文档](https://github.com/google/pprof/blob/master/doc/README.md)。 diff --git a/docs/zh_cn/administration/metadata/mysql_best_practices.md b/docs/zh_cn/administration/metadata/mysql_best_practices.md new file mode 100644 index 0000000..01c6903 --- /dev/null +++ b/docs/zh_cn/administration/metadata/mysql_best_practices.md @@ -0,0 +1,5 @@ +--- +sidebar_label: MySQL 最佳实践 +sidebar_position: 2 +--- +# MySQL 最佳实践 \ No newline at end of file diff --git a/docs/zh_cn/administration/metadata/postgresql_best_practices.md b/docs/zh_cn/administration/metadata/postgresql_best_practices.md new file mode 100644 index 0000000..3c49421 --- /dev/null +++ b/docs/zh_cn/administration/metadata/postgresql_best_practices.md @@ -0,0 +1,51 @@ +--- +sidebar_label: PostgreSQL +sidebar_position: 2 +--- +# PostgreSQL 最佳实践 + +对于数据与元数据分离存储的分布式文件系统,元数据的读写性能直接影响整个系统的工作效率,元数据的安全也直接关系着整个系统的数据安全。 + +在生产环境中,建议您优先选择云计算平台提供的托管型云数据库,并搭配恰当的高可用性架构。 + +不论自行搭建,还是采用云数据库,使用 JuiceFS 应该始终关注元数据的完整和安全。 + +## 通信安全 + +默认情况下,JuiceFS 客户端会采用 SSL 加密协议连接 PostgreSQL,如果数据库未启用 SSL 加密,则需要在元数据 URL 中需要附加 `sslmode=disable` 参数。 + +建议配置并始终开启数据库服务端 SSL 加密。 + +## 通过环境变量传递数据库信息 + +虽然直接在元数据 URL 中设置数据库密码简单方便,但日志或程序输出中可能会泄漏密码,为了保证数据安全,应该始终通过环境变量传递数据库密码。 + +环境变量名称可以自由定义,例如: + +```shell +export $PG_PASSWD=mypassword +``` + +在元数据 URL 中通过环境变量传递数据库密码: + +```shell +juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs" /mnt/jfs +``` + +## 定期备份 + +请参考官方手册 [Chapter 26. Backup and Restore](https://www.postgresql.org/docs/current/backup.html) 了解如何备份和恢复数据库。 + +建议制定数据库备份计划,并遵照计划定期备份 PostgreSQL 数据库,与此同时,还应该在实验环境中尝试恢复数据,确认备份是有效的。 + +## 使用连接池 + +连接池是客户端与数据库之间的中间层,由它作为中介提升连接效率,降低短连接的损耗。常用的连接池有 [PgBouncer](https://www.pgbouncer.org/) 和 [Pgpool-II](https://www.pgpool.net/) 。 + +## 高可用 + +PostgreSQL 官方文档 [High Availability, Load Balancing, and Replication](https://www.postgresql.org/docs/current/different-replication-solutions.html) 对比了几种常用的数据库高可用方案,请根据实际业务需要选择恰当的高可用方案。 + +:::note 注意 +JuiceFS 使用[事务](https://www.postgresql.org/docs/current/tutorial-transactions.html)保证元数据操作的原子性。由于 PostgreSQL 尚不支持 Muti-Shard (Distributed) 分布式事务,因此请勿将多服务器分布式架构用于 JuiceFS 元数据存储。 +::: diff --git a/docs/zh_cn/administration/metadata/redis_best_practices.md b/docs/zh_cn/administration/metadata/redis_best_practices.md new file mode 100644 index 0000000..7fedb43 --- /dev/null +++ b/docs/zh_cn/administration/metadata/redis_best_practices.md @@ -0,0 +1,148 @@ +--- +sidebar_label: Redis +sidebar_position: 1 +slug: /redis_best_practices +--- +# Redis 最佳实践 + +当采用 Redis 作为 JuiceFS 元数据存储引擎时,即由 Redis 负责存储所有元数据并响应客户端对元数据的操作。若 Redis 出现连接不稳定、服务不可用或元数据丢失等问题,可能会导致读写速度慢或数据损坏等情况。 + +:::tip 建议 +强烈建议使用云平台提供的 Redis 托管服务,详情查看「[推荐的 Redis 托管服务](#推荐的-redis-托管服务)」。 +::: + +对于自主维护的 Redis 数据库,建议了解以下几个方面: + +:::note 注意 +本文部分内容来自 Redis 官网,若有不一致的表述,请以 Redis 官方文档为准。 +::: + +## 内存使用量 + +JuiceFS 元数据引擎的使用空间主要与文件系统中的文件数量有关,根据我们的经验,每一个文件的元数据会大约占用 300 字节内存。因此,如果要存储 1 亿个文件,大约需要 30GiB 内存。 + +你可以通过 Redis 的 [`INFO memory`](https://redis.io/commands/info) 命令查看具体的内存使用量,例如: + +``` +> INFO memory +used_memory: 19167628056 +used_memory_human: 17.85G +used_memory_rss: 20684886016 +used_memory_rss_human: 19.26G +... +used_memory_overhead: 5727954464 +... +used_memory_dataset: 13439673592 +used_memory_dataset_perc: 70.12% +``` + +其中 `used_memory_rss` 是 Redis 实际使用的总内存大小,这里既包含了存储在 Redis 中的数据大小(也就是上面的 `used_memory_dataset`),也包含了一些 Redis 的[系统开销](https://redis.io/commands/memory-stats)(也就是上面的 `used_memory_overhead`)。前面提到每个文件的元数据大约占用 300 字节是通过 `used_memory_dataset` 来计算的,如果你发现你的 JuiceFS 文件系统中单个文件元数据占用空间远大于 300 字节,可以尝试运行 [`juicefs gc`](../../reference/command_reference.md#juicefs-gc) 命令来清理可能存在的冗余数据。 + +## 数据可用性 + +:::caution 注意 +JuiceFS 使用 「[Redis 事务](https://redis.io/topics/transactions)」保证元数据操作的原子性。但由于 Redis Cluster 集群模式不支持事务(Transactions),因此 Redis 集群不可用作 JuiceFS 元数据存储。如有 Redis 高可用需求,请使用 Sentinel 哨兵模式。 +::: + +[Redis 哨兵](https://redis.io/topics/sentinel) 是 Redis 官方的高可用解决方案,它提供以下功能: + +- **监控**,哨兵会不断检查您的 master 实例和 replica 实例是否按预期工作。 +- **通知**,当受监控的 Redis 实例出现问题时,哨兵可以通过 API 通知系统管理员或其他计算机程序。 +- **自动故障转移**,如果 master 没有按预期工作,哨兵可以启动一个故障转移过程,其中一个 replica 被提升为 master,其他的副本被重新配置为使用新的 master,应用程序在连接 Redis 服务器时会被告知新的地址。 +- **配置提供程序**,哨兵会充当客户端服务发现的权威来源:客户端连接到哨兵以获取当前 Redis 主节点的地址。如果发生故障转移,哨兵会报告新地址。 + +**Redis 2.8 开始提供稳定版本的 Redis 哨兵**。Redis 2.6 提供的第一版 Redis 哨兵已被弃用,不建议使用。 + +在使用 Redis 哨兵之前,您需要了解一些[基础知识](https://redis.io/topics/sentinel#fundamental-things-to-know-about-sentinel-before-deploying): + +1. 您至少需要三个哨兵实例才能进行稳健的部署。 +2. 这三个哨兵实例应放置在彼此独立的计算机或虚拟机中。例如,分别位于不同的可用区域上的不同物理服务器或虚拟机上。 +3. **由于 Redis 使用异步复制,无法保证在发生故障时能够保留已确认的写入。** 然而,有一些部署 哨兵的方法,可以使丢失写入的窗口限于某些时刻,当然还有其他不太安全的部署方法。 +4. 如果您不在开发环境中经常进行测试,就无法确保 HA 的设置是安全的。在条件允许的情况,如果能够在生产环境中进行验证则更好。错误的配置往往都是在你难以预期和响应的时间出现(比如,凌晨 3 点你的 master 节点悄然罢工)。 +5. **哨兵、Docker 或其他形式的网络地址转换或端口映射应谨慎混用**:Docker 执行端口重映射,会破坏其他哨兵进程的哨兵自动发现以及 master 的 replicas 列表。 + +更多信息请阅读[官方文档](https://redis.io/topics/sentinel)。 + +部署了 Redis 服务器和哨兵以后,`META-URL` 可以指定为 `redis[s]://[[USER]:PASSWORD@]MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`,例如: + +```shell +juicefs mount redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2 ~/jfs +``` + +:::tip 提示 +对于 JuiceFS v0.16 及以上版本,URL 中提供的密码会用于连接 Redis 服务器,哨兵的密码需要用环境变量 `SENTINEL_PASSWORD` 指定。对于更早的版本,URL 中的密码会同时用于连接 Redis 服务器和哨兵,也可以通过环境变量 `SENTINEL_PASSWORD` 和 `REDIS_PASSWORD` 来覆盖。 +::: + +## 数据持久性 + +Redis 提供了不同范围的[持久性](https://redis.io/topics/persistence)选项: + +- **RDB**:以指定的时间间隔生成当前数据集的快照。 +- **AOF**:记录服务器收到的每一个写操作,在服务器启动时重建原始数据集。命令使用与 Redis 协议本身相同的格式以追加写(append-only)的方式记录。当日志变得太大时,Redis 能够在后台重写日志。 +- **RDB+AOF** 建议:组合使用 AOF 和 RDB。在这种情况下,当 Redis 重新启动时,AOF 文件将用于重建原始数据集,因为它保证是最完整的。 + +当使用 AOF 时,您可以有不同的 fsync 策略: + +1. 没有 fsync; +2. 每秒 fsync 默认; +3. 每次查询 fsync。 + +默认策略「每秒 fsync」是不错的选择(fsync 是使用后台线程执行的,当没有 fsync 正在进行时,主线程会努力执行写入),**但你可能丢失最近一秒钟的写入**。 + +磁盘可能会损坏,虚拟机可能会消失,即使采用 RBD+AOF 模式,**依然需要定期备份 Redis 数据**。 + +Redis 对数据备份非常友好,因为您可以在数据库运行时复制 RDB 文件,RDB 一旦生成就永远不会被修改,当它被生成时,它使用一个临时名称,并且只有在新快照完成时才使用 `rename` 原子地重命名到其最终目的地。您还可以复制 AOF 文件以创建备份。 + +更多信息请阅读[官方文档](https://redis.io/topics/persistence)。 + +## 备份 Redis 数据 + +磁盘会损坏、云实例会消失,**请务必备份数据库!** + +默认情况下,Redis 将数据集的快照保存在磁盘上,名为 `dump.rdb` 的二进制文件中。你可以根据需要,将 Redis 配置为当数据集至少发生 M 次变化时,每 N 秒保存一次,也可以手动调用 [`SAVE`](https://redis.io/commands/save) 或 [`BGSAVE`](https://redis.io/commands/bgsave) 命令。 + +Redis 对数据备份非常友好,因为您可以在数据库运行时复制 RDB 文件:RDB 一旦生成就永远不会被修改,当它被生成时,它使用一个临时名称,并且只有在新快照完成时才使用 `rename(2)` 原子地重命名到其最终目的地。 + +这意味着在服务器运行时复制 RDB 文件是完全安全的。以下是我们的建议: + +- 在您的服务器中创建一个 cron 任务,在一个目录中创建 RDB 文件的每小时快照,并在另一个目录中创建每日快照。 +- 每次 cron 脚本运行时,请务必调用 `find` 命令以确保删除太旧的快照:例如,您可以保留最近 48 小时的每小时快照,以及一至两个月的每日快照。要确保使用数据和时间信息来命名快照。 +- 确保每天至少一次将 RDB 快照从运行 Redis 的实例传输至 _数据中心以外_ 或至少传输至 _物理机以外_ 。 + +更多信息请阅读[官方文档](https://redis.io/topics/persistence)。 + +--- + +## 推荐的 Redis 托管服务 + +### Amazon ElastiCache for Redis + +[Amazon ElastiCache for Redis](https://aws.amazon.com/elasticache/redis) 是为云构建的完全托管的、与 Redis 兼容的内存数据存储。它提供[自动故障切换](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/AutoFailover.html)、[自动备份](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/backups-automatic.html)等功能以确保可用性和持久性。 + +:::info 说明 +Amazon ElastiCache for Redis 有两种类型:禁用集群模式和启用集群模式。因为 JuiceFS 使用[事务](https://redis.io/topics/transactions)来保证元数据操作的原子性,所以不能使用「启用集群模式」类型。 +::: + +### Google Cloud Memorystore for Redis + +[Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) 是针对 Google Cloud 的完全托管的 Redis 服务。通过利用高度可扩展、可用且安全的 Redis 服务,在 Google Cloud 上运行的应用程序可以实现卓越的性能,而无需管理复杂的 Redis 部署。 + +### Azure Cache for Redis + +[Azure Cache for Redis](https://azure.microsoft.com/en-us/services/cache) 是一个完全托管的内存缓存,支持高性能和可扩展的架构。使用它来创建云或混合部署,以亚毫秒级延迟处理每秒数百万个请求——所有这些都具有托管服务的配置、安全性和可用性优势。 + +### 阿里云 ApsaraDB for Redis + +[阿里云 ApsaraDB for Redis](https://www.alibabacloud.com/product/apsaradb-for-redis) 是一种兼容原生 Redis 协议的数据库服务。它支持混合内存和硬盘以实现数据持久性。云数据库 Redis 版提供高可用的热备架构,可扩展以满足高性能、低延迟的读写操作需求。 + +:::info 说明 +ApsaraDB for Redis 支持 3 种类型的[架构](https://www.alibabacloud.com/help/doc-detail/86132.htm):标准、集群和读写分离。因为 JuiceFS 使用[事务](https://redis.io/topics/transactions)来保证元数据操作的原子性,所以不能使用集群类型架构。 +::: + +### 腾讯云 TencentDB for Redis + +[腾讯云 TencentDB for Redis](https://intl.cloud.tencent.com/product/crs) 是一种兼容 Redis 协议的缓存和存储服务。丰富多样的数据结构选项,帮助您开发不同类型的业务场景,提供主从热备份、容灾自动切换、数据备份、故障转移、实例监控、在线等一整套数据库服务缩放和数据回滚。 + +:::info 说明 +TencentDB for Redis 支持两种类型的[架构](https://intl.cloud.tencent.com/document/product/239/3205):标准和集群。因为 JuiceFS 使用[事务](https://redis.io/topics/transactions)来保证元数据操作的原子性,所以不能使用集群类型架构。 +::: diff --git a/docs/zh_cn/administration/metadata/tikv_best_practices.md b/docs/zh_cn/administration/metadata/tikv_best_practices.md new file mode 100644 index 0000000..dbfc66a --- /dev/null +++ b/docs/zh_cn/administration/metadata/tikv_best_practices.md @@ -0,0 +1,5 @@ +--- +sidebar_label: TiKV 最佳实践 +sidebar_position: 3 +--- +# TiKV 最佳实践 \ No newline at end of file diff --git a/docs/zh_cn/administration/metadata_dump_load.md b/docs/zh_cn/administration/metadata_dump_load.md new file mode 100644 index 0000000..8bdedfc --- /dev/null +++ b/docs/zh_cn/administration/metadata_dump_load.md @@ -0,0 +1,123 @@ +--- +sidebar_label: 元数据备份和恢复 +sidebar_position: 4 +slug: /metadata_dump_load +--- +# JuiceFS 元数据备份和恢复 + +:::tip 提示 +- JuiceFS v0.15.2 开始支持元数据手动备份、恢复和引擎间迁移。 +- JuiceFS v1.0.0 开始支持元数据自动备份 +::: + +## 手动备份 + +JuiceFS 支持[多种元数据存储引擎](../reference/how_to_setup_metadata_engine.md),且各引擎内部的数据管理格式各有不同。为了便于管理,JuiceFS 提供了 `dump` 命令允许将所有元数据以统一格式写入到 [JSON](https://www.json.org/json-en.html) 文件进行备份。同时,JuiceFS 也提供了 `load` 命令,允许将备份恢复或迁移到任意元数据存储引擎。命令的详细信息请参考[这里](../reference/command_reference.md#juicefs-dump)。 + +### 元数据备份 + +使用 JuiceFS 客户端提供的 `dump` 命令可以将元数据导出到文件,例如: + +```bash +juicefs dump redis://192.168.1.6:6379/1 meta.dump +``` + +该命令默认从根目录 `/` 开始,深度遍历目录树下所有文件,将每个文件的元数据信息按 JSON 格式写入到文件。 + +:::note 注意 +`juicefs dump` 仅保证单个文件自身的完整性,不提供全局时间点快照的功能,如在 dump 过程中业务仍在写入,最终结果会包含不同时间点的信息。 +::: + +Redis、MySQL 等数据库都有其自带的备份工具,如 [Redis RDB](https://redis.io/topics/persistence#backing-up-redis-data) 和 [mysqldump](https://dev.mysql.com/doc/mysql-backup-excerpt/5.7/en/mysqldump-sql-format.html) 等,使用它们作为 JuiceFS 元数据存储,你仍然有必要用各个数据库自身的备份工具定期备份元数据。 + +`juicefs dump` 的价值在于它能将完整的元数据信息以统一的 JSON 格式导出,便于管理和保存,而且不同的元数据存储引擎都可以识别并导入。在实际应用中,`dump` 命令于数据库自带的备份工具应该共同使用,相辅相成。 + +:::note 注意 +以上讨论的仅为元数据备份,完整的文件系统备份方案还应至少包含对象存储数据的备份,如异地容灾、回收站、多版本等。 +::: + +### 元数据恢复 + +:::tip 特别提示 +JSON 备份只能恢复到 `新创建的数据库` 或 `空数据库` 中。 +::: + +使用 JuiceFS 客户端提供的 `load` 命令可以将已备份的 JSON 文件中的元数据导入到一个新的**空数据库**中,例如: + +```bash +juicefs load redis://192.168.1.6:6379/1 meta.dump +``` + +该命令会自动处理因包含不同时间点文件而产生的冲突问题,并重新计算文件系统的统计信息(空间使用量,inode 计数器等),最后在数据库中生成一份全局一致的元数据。另外,如果你想自定义某些元数据(请务必小心),可以尝试在 load 前手动修改 JSON 文件。 + +### 元数据迁移 + +:::tip 特别提示 +元数据迁移操作要求目标数据库是 `新创建的` 或 `空数据库`。 +::: + +得益于 JSON 格式的通用性,JuiceFS 支持的所有元数据存储引擎都能识别,因此可以将元数据信息从一种引擎中导出为 JSON 备份,然后再导入到另外一种引擎,从而实现元数据在不同类型引擎间的迁移。例如: + +```bash +$ juicefs dump redis://192.168.1.6:6379/1 meta.dump +$ juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs meta.dump +``` + +也可以通过系统的 Pipe 直接迁移: + +```bash +$ juicefs dump redis://192.168.1.6:6379/1 | juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs +``` + +:::caution 风险提示 +为确保迁移前后文件系统内容一致,需要在迁移过程中停止业务写入。另外,由于迁移后仍使用原来的对象存储,在新的元数据引擎上线前,请确保旧的引擎已经下线或仅有对象存储的只读权限,否则可能造成文件系统损坏。 +::: + +### 元数据检视 + +除了可以导出完整的元数据信息,`dump` 命令还支持导出特定子目录中的元数据。因为导出的 JSON 内容可以让用户非常直观地查看到指定目录树下所有文件的内部信息,因此常被用来辅助排查问题。例如: + +```bash +$ juicefs dump redis://192.168.1.6:6379/1 meta.dump --subdir /path/in/juicefs +``` + +另外,也可以使用 `jq` 等工具对导出文件进行分析。 + +:::note 注意 +为保证服务稳定,请不要在线上环境 dump 过于大的目录。 +::: + +## 自动备份 + +从 JuiceFS v1.0.0 开始,不论文件系统通过 `mount` 命令挂载,还是通过 JuiceFS S3 网关及 Hadoop Java SDK 访问,客户端每小时都会自动备份元数据并拷贝到对象存储。 + +备份的文件存储在对象存储的 `meta` 目录中,它是一个独立于数据存储的目录,在挂载点中不可见,也不会与数据存储之间产生影响,用对象存储的文件浏览器即可查看和管理。 + +![](../images/meta-auto-backup-list.png) + +默认情况下,JuiceFS 客户端每小时备份一次元数据,自动备份的频率可以在挂载文件系统时通过 `--backup-meta` 选项进行调整,例如,要设置为每 8 个小时执行一次自动备份: + +``` +$ sudo juicefs mount -d --backup-meta 8h redis://127.0.0.1:6379/1 /mnt +``` + +备份频率可以精确到秒,支持的单位如下: + +- `h`:精确到小时,如 `1h`; +- `m`:精确到分钟,如 `30m`、`1h30m`; +- `s`:精确到秒,如 `50s`、`30m50s`、`1h30m50s`; + +### 自动备份策略 + +虽然自动备份元数据成为了客户端的默认动作,但在多主机共享挂载同一个文件系统时并不会发生备份冲突。 + +JuiceFS 维护了一个全局的时间戳,确保同一时刻只有一个客户端执行备份操作。当客户端之间设置了不同的备份周期,那么就会以周期最短的设置为准进行备份。 + +### 备份清理策略 + +JuiceFS 会按照以下规则定期清理备份: + +- 保留 2 天以内全部的备份; +- 超过 2 天不足 2 周的,保留每天中的 1 个备份; +- 超过 2 周不足 2 月的,保留每周中的 1 个备份; +- 超过 2 个月的,保留每个月中的 1 个备份。 diff --git a/docs/zh_cn/administration/migration/from_hdfs.md b/docs/zh_cn/administration/migration/from_hdfs.md new file mode 100644 index 0000000..433d9b9 --- /dev/null +++ b/docs/zh_cn/administration/migration/from_hdfs.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 从 HDFS 迁移 +sidebar_position: 3 +--- +# 从 HDFS 迁移数据 \ No newline at end of file diff --git a/docs/zh_cn/administration/migration/from_local.md b/docs/zh_cn/administration/migration/from_local.md new file mode 100644 index 0000000..ebfee70 --- /dev/null +++ b/docs/zh_cn/administration/migration/from_local.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 从本地磁盘迁移 +sidebar_position: 1 +--- +# 从本地磁盘迁移数据 \ No newline at end of file diff --git a/docs/zh_cn/administration/migration/from_s3.md b/docs/zh_cn/administration/migration/from_s3.md new file mode 100644 index 0000000..882ae5a --- /dev/null +++ b/docs/zh_cn/administration/migration/from_s3.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 从对象存储迁移 +sidebar_position: 2 +--- +# 从对象存储迁移数据 \ No newline at end of file diff --git a/docs/zh_cn/administration/monitoring.md b/docs/zh_cn/administration/monitoring.md new file mode 100644 index 0000000..b181e24 --- /dev/null +++ b/docs/zh_cn/administration/monitoring.md @@ -0,0 +1,214 @@ +--- +sidebar_label: 监控 +sidebar_position: 6 +--- + +# 监控 + +JuiceFS 为每个文件系统提供一个 [Prometheus](https://prometheus.io) API(默认的 API 地址是 `http://localhost:9567/metrics`),这个 API 可以用于收集 JuiceFS 监控指标。当收集好监控指标以后,可以通过 JuiceFS 提供的 [Grafana](https://grafana.com) 仪表盘模板快速展示这些指标。 + +## 收集监控指标 + +根据部署 JuiceFS 的方式不同可以有不同的收集监控指标的方法,下面分别介绍。 + +### 挂载点 + +当通过 [`juicefs mount`](../reference/command_reference.md#juicefs-mount) 命令挂载 JuiceFS 文件系统后,可以通过 `http://localhost:9567/metrics` 这个地址收集监控指标,你也可以通过 `--metrics` 选项自定义。如: + +```shell +$ juicefs mount --metrics localhost:9567 ... +``` + +你可以使用命令行工具查看这些监控指标: + +```shell +$ curl http://localhost:9567/metrics +``` + +除此之外,每个 JuiceFS 文件系统的根目录还有一个叫做 `.stats` 的隐藏文件,通过这个文件也可以查看监控指标。例如(这里假设挂载点的路径是 `/jfs`): + +```shell +$ cat /jfs/.stats +``` + +### Kubernetes + +[JuiceFS CSI 驱动](../deployment/how_to_use_on_kubernetes.md)默认会在 mount pod 的 `9567` 端口提供监控指标,也可以通过在 `mountOptions` 中添加 `metrics` 选项自定义(关于如何修改 `mountOptions` 请参考 [CSI 驱动文档](https://juicefs.com/docs/zh/csi/examples/mount-options)),如: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: juicefs-pv + labels: + juicefs-name: ten-pb-fs +spec: + ... + mountOptions: + - metrics=0.0.0.0:9567 +``` + +新增一个抓取任务到 `prometheus.yml` 来收集监控指标: + +```yaml +scrape_configs: + - job_name: 'juicefs' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: juicefs-mount + - source_labels: [__address__] + action: replace + regex: ([^:]+)(:\d+)? + replacement: $1:9567 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace +``` + +这里假设 Prometheus 服务运行在 Kubernetes 集群中,如果你的 Prometheus 服务运行在 Kubernetes 集群之外,请确保 Prometheus 服务可以访问 Kubernetes 节点,请参考[这个 issue](https://github.com/prometheus/prometheus/issues/4633) 添加 `api_server` 和 `tls_config` 配置到以上文件: + +```yaml +scrape_configs: + - job_name: 'juicefs' + kubernetes_sd_configs: + - api_server: + role: pod + tls_config: + ca_file: <...> + cert_file: <...> + key_file: <...> + insecure_skip_verify: false + relabel_configs: + ... +``` + +### S3 网关 + +:::note 注意 +该特性需要运行 0.17.1 及以上版本 JuiceFS 客户端 +::: + +[JuiceFS S3 网关](../deployment/s3_gateway.md)默认会在 `http://localhost:9567/metrics` 这个地址提供监控指标,你也可以通过 `--metrics` 选项自定义。如: + +```shell +$ juicefs gateway --metrics localhost:9567 ... +``` + +如果你是在 Kubernetes 中部署 JuiceFS S3 网关,可以参考 [Kubernetes](#kubernetes) 小节的 Prometheus 配置来收集监控指标(区别主要在于 `__meta_kubernetes_pod_label_app_kubernetes_io_name` 这个标签的正则表达式),例如: + +```yaml +scrape_configs: + - job_name: 'juicefs-s3-gateway' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: juicefs-s3-gateway + - source_labels: [__address__] + action: replace + regex: ([^:]+)(:\d+)? + replacement: $1:9567 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace +``` + +#### 通过 Prometheus Operator 收集 + +[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) 让用户在 Kubernetes 环境中能够快速部署和管理 Prometheus,借助 Prometheus Operator 提供的 `ServiceMonitor` CRD 可以自动生成抓取配置。例如(假设 JuiceFS S3 网关的 `Service` 部署在 `kube-system` 名字空间): + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: juicefs-s3-gateway +spec: + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + app.kubernetes.io/name: juicefs-s3-gateway + endpoints: + - port: metrics +``` + +有关 Prometheus Operator 的更多信息,请查看[官方文档](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md)。 + +### Hadoop + +[JuiceFS Hadoop Java SDK](../deployment/hadoop_java_sdk.md) 支持把监控指标上报到 [Pushgateway](https://github.com/prometheus/pushgateway),然后让 Prometheus 从 Pushgateway 抓取指标。 + +请用如下配置启用指标上报: + +```xml + + juicefs.push-gateway + host:port + +``` + +同时可以通过 `juicefs.push-interval` 配置修改上报指标的频率,默认为 10 秒上报一次。JuiceFS Hadoop Java SDK 支持的所有配置参数请参考[文档](../deployment/hadoop_java_sdk.md#客户端配置参数)。 + +:::info 说明 +根据 [Pushgateway 官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md#configure-the-pushgateway-as-a-target-to-scrape)的建议,Prometheus 的[抓取配置](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config)中需要设置 `honor_labels: true`。 + +需要特别注意,Prometheus 从 Pushgateway 抓取的指标的时间戳不是 JuiceFS Hadoop Java SDK 上报时的时间,而是抓取时的时间,具体请参考 [Pushgateway 官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md#about-timestamps)。 + +默认情况下 Pushgateway 只会在内存中保存指标,如果需要持久化到磁盘上,可以通过 `--persistence.file` 选项指定保存的文件路径以及 `--persistence.interval` 选项指定保存到文件的频率(默认 5 分钟保存一次)。 +::: + +:::note 注意 +每一个使用 JuiceFS Hadoop Java SDK 的进程会有唯一的指标,而 Pushgateway 会一直记住所有收集到的指标,导致指标数持续积累占用过多内存,也会使得 Prometheus 抓取指标时变慢,建议定期清理 Pushgateway 上的指标。 + +定期使用下面的命令清理 Pushgateway 的指标数据,清空指标不影响运行中的 JuiceFS Hadoop Java SDK 持续上报数据。**注意 Pushgateway 启动时必须指定 `--web.enable-admin-api` 选项,同时以下命令会清空 Pushgateway 中的所有监控指标。** + +```bash +$ curl -X PUT http://host:9091/api/v1/admin/wipe +``` +::: + +有关 Pushgateway 的更多信息,请查看[官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md)。 + +### 使用 Consul 作为注册中心 + +:::note 注意 +该特性需要运行 1.0.0 及以上版本 JuiceFS 客户端 +::: + +JuiceFS 支持使用 Consul 作为监控指标 API 的注册中心,默认的 Consul 地址是 `127.0.0.1:8500`,你也可以通过 `--consul` 选项自定义。如: + +```shell +$ juicefs mount --consul 1.2.3.4:8500 ... +``` + +当配置了 Consul 地址以后,`--metrics` 选项不再需要配置,JuiceFS 将会根据自身网络与端口情况自动配置监控指标 URL。如果同时设置了 `--metrics`,则会优先尝试监听配置的 URL。 + +注册到 Consul 上的每个实例,其 `serviceName` 都为 `juicefs`,`serviceId` 的格式为 `:`,例如:`127.0.0.1:/tmp/jfs`。 + +每个 instance 的 meta 都包含了 `hostname` 与 `mountpoint` 两个维度,其中 `mountpoint` 为 `s3gateway` 代表该实例为 S3 网关。 + +## 展示监控指标 + +### Grafana 仪表盘模板 + +JuiceFS 提供一些 Grafana 的仪表盘模板,将模板导入以后就可以展示收集上来的监控指标。目前提供的仪表盘模板有: + +| 模板名称 | 说明 | +| ---- | ---- | +| [`grafana_template.json`](https://github.com/juicedata/juicefs/blob/main/docs/en/grafana_template.json) | 用于展示自挂载点、S3 网关(非 Kubernetes 部署)及 Hadoop Java SDK 收集的指标 | +| [`grafana_template_k8s.json`](https://github.com/juicedata/juicefs/blob/main/docs/en/grafana_template_k8s.json) | 用于展示自 Kubernetes CSI 驱动、S3 网关(Kubernetes 部署)收集的指标 | + +Grafana 仪表盘示例效果如下图: + +![JuiceFS Grafana dashboard](../images/grafana_dashboard.png) + +## 监控指标索引 + +请参考[「JuiceFS 监控指标」](../reference/p8s_metrics.md)文档 diff --git a/docs/zh_cn/administration/quota.md b/docs/zh_cn/administration/quota.md new file mode 100644 index 0000000..6d076f7 --- /dev/null +++ b/docs/zh_cn/administration/quota.md @@ -0,0 +1,128 @@ +--- +sidebar_label: 存储配额 +sidebar_position: 7 +--- +# JuiceFS 存储配额 + +JuiceFS v0.14.2 开始支持文件系统级别的存储配额,该功能包括: + +- 限制文件系统的总可用容量 +- 限制文件系统的 inode 总数 + +:::tip 提示 +存储限额设置会保存在元数据引擎中以供所有挂载点读取,每个挂载点的客户端也会缓存自己的已用容量和 inodes 数,每秒向元数据引擎同步一次。与此同时,客户端每 10 秒会从元数据引擎读取最新的用量值,从而实现用量信息在每个挂载点之间同步,但这种信息同步机制并不能保证用量数据被精确统计。 +::: + +## 查看文件系统的基本信息 + +以 Linux 环境为例,使用系统自带的 `df` 命令可以看到,一个 JuiceFS 类型的文件系统默认的容量标识为 `1.0P` : + +```shell +$ df -Th | grep juicefs +JuiceFS:ujfs fuse.juicefs 1.0P 682M 1.0P 1% /mnt +``` + +:::note 说明 +JuiceFS 通过 FUSE 实现对 POSIX 接口的支持,因为底层通常是容量能够无限扩展的对象存储,所以标识容量只是一个估值(也代表无限制)并非实际容量,它会随着实际用量动态变化。 +::: + +通过客户端自带的 `config` 命令可以查看一个文件系统的详细信息: + +```shell +$ juicefs config $METAURL +{ + "Name": "ujfs", + "UUID": "1aa6d290-279b-432f-b9b5-9d7fd597dec2", + "Storage": "minio", + "Bucket": "127.0.0.1:9000/jfs1", + "AccessKey": "herald", + "SecretKey": "removed", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 0 +} +``` + +## 限制总容量 + +可以在创建文件系统时通过 `--capacity` 设置容量限额,单位 GiB,例如创建一个可用容量为 100 GiB 文件系统的: + +```shell +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--capacity 100 \ +$METAURL myjfs +``` + +也可以通过 `config` 命令,为一个已创建的文件系统设置容量限额: + +```shell +$ juicefs config $METAURL --capacity 100 +2022/01/27 12:31:39.506322 juicefs[16259] : Meta address: postgres://herald@127.0.0.1:5432/jfs1 +2022/01/27 12:31:39.521232 juicefs[16259] : The latency to database is too high: 14.771783ms + capacity: 0 GiB -> 100 GiB +``` + +设置了存储限额的文件系统,标识容量会变成限制容量: + +```shell +$ df -Th | grep juicefs +JuiceFS:ujfs fuse.juicefs 100G 682M 100G 1% /mnt +``` + +## 限制 inode 总量 + +在 Linux 系统中,每个文件(文件夹也是文件的一种)不论大小都有一个 inode,因此限制 inode 数量等同于限制文件数量。 + +可以在创建文件系统时通过 `--inodes` 设置限额,例如: + +``` +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--inodes 100 \ +$METAURL myjfs +``` + +以上命令创建的文件系统仅允许存储 100 个文件,但不限制单个文件的大小,比如单个文件 1TB 甚至更大也没有问题,只要文件总数不超过 100 个即可。 + +也可以通过 `config` 命令,为一个已创建的文件系统设置容量限额: + +```shell +$ juicefs config $METAURL --inodes 100 +2022/01/27 12:35:37.311465 juicefs[16407] : Meta address: postgres://herald@127.0.0.1:5432/jfs1 +2022/01/27 12:35:37.322991 juicefs[16407] : The latency to database is too high: 11.413961ms + inodes: 0 -> 100 +``` + +## 组合使用 + +你可以结合 `--capacity` 和 `--inodes` 更灵活的设置文件系统的容量限额,比如,创建一个文件系统,限制总容量为 100TiB 且仅允许存储 100000 文件: + +```shell +$ juicefs format --storage minio \ +--bucket 127.0.0.1:9000/jfs1 \ +... +--capacity 102400 \ +--inodes 100000 \ +$METAURL myjfs +``` + +同样地,对于已创建的文件系统,可分别进行设置: + +```shell +juicefs config $METAURL --capacity 102400 +``` + +```shell +juicefs config $METAURL --inodes 100000 +``` + +:::tip 提示 +客户端每 60 秒从元数据引擎读取一次最新的存储限额设置来更新本地的设置,这个时间频率可能会造成其他挂载点最长需要 60 秒才能完成限额设置的更新。 +::: diff --git a/docs/zh_cn/administration/status_check_and_maintenance.md b/docs/zh_cn/administration/status_check_and_maintenance.md new file mode 100644 index 0000000..ce706ac --- /dev/null +++ b/docs/zh_cn/administration/status_check_and_maintenance.md @@ -0,0 +1,8 @@ +--- +sidebar_label: 状态检查 & 维护 +sidebar_position: 8 +--- +# 状态检查 & 维护 + +:::note 注意 +文档正在编写 \ No newline at end of file diff --git a/docs/zh_cn/administration/sync_accounts_between_multiple_hosts.md b/docs/zh_cn/administration/sync_accounts_between_multiple_hosts.md new file mode 100644 index 0000000..bfeb04c --- /dev/null +++ b/docs/zh_cn/administration/sync_accounts_between_multiple_hosts.md @@ -0,0 +1,130 @@ +--- +sidebar_label: 多主机间同步账户 +sidebar_position: 10 +slug: /sync_accounts_between_multiple_hosts +--- + +# JuiceFS 多主机间同步账户 + +JuiceFS 支持 POSIX 兼容的 ACL,以目录或文件的粒度管理权限。该行为与本地文件系统相同。 + +为了让用户获得直观一致的权限管理体验(例如,用户 A 在主机 X 中访问的文件,在主机 Y 中也应该可以用相同的用户身份访问),想要访问 JuiceFS 存储的同一个用户,应该在所有主机上具有相同的 UID 和 GID。 + +在这里,我们提供了一个简单的 [Ansible](https://www.ansible.com/community) playbook 来演示如何确保一个帐户在多个主机上具有相同的 UID 和 GID。 + +:::note 注意 +如果你是在 Hadoop 环境使用 JuiceFS,除了在多主机间同步账户以外,也可以指定一个全局的用户列表和所属用户组文件,具体请参见[这里](../deployment/hadoop_java_sdk.md#其他配置)。 +::: + +## 安装 Ansible + +选择一个主机作为 [控制节点](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#managed-node-requirements),它可以使用 `ssh` 以 `root` 或其他在 sudo 用户组的身份,访问所有。在此主机上安装 Ansible。阅读 [安装 Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installing-ansible) 了解更多安装细节。 + +## 确保所有主机上的帐户相同 + +创建一个空目录 `account-sync` ,将下面的内容保存在该目录下的 `play.yaml` 中。 + +```yaml +--- +- hosts: all + tasks: + - name: "Ensure group {{ group }} with gid {{ gid }} exists" + group: + name: "{{ group }}" + gid: "{{ gid }}" + state: present + + - name: "Ensure user {{ user }} with uid {{ uid }} exists" + user: + name: "{{ user }}" + uid: "{{ uid }}" + group: "{{ gid }}" + state: present +``` + +在该目录下创建一个名为 `hosts` 的文件,将所有需要创建账号的主机的 IP 地址放置在该文件中,每行一个 IP。 + +在这里,我们确保在 2 台主机上使用 UID 1200 的帐户 `alice` 和 GID 500 的 `staff` 组: + +```shell +~/account-sync$ cat hosts +172.16.255.163 +172.16.255.180 +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=staff gid=500 user=alice uid=1200" play.yaml + +PLAY [all] ************************************************************************************************ + +TASK [Gathering Facts] ************************************************************************************ +ok: [172.16.255.180] +ok: [172.16.255.163] + +TASK [Ensure group staff with gid 500 exists] ************************************************************* +ok: [172.16.255.163] +ok: [172.16.255.180] + +TASK [Ensure user alice with uid 1200 exists] ************************************************************* +changed: [172.16.255.180] +changed: [172.16.255.163] + +PLAY RECAP ************************************************************************************************ +172.16.255.163 : ok=3 changed=1 unreachable=0 failed=0 +172.16.255.180 : ok=3 changed=1 unreachable=0 failed=0 +``` + +现在已经在这 2 台主机上创建了新帐户 `alice:staff`。 + +如果指定的 UID 或 GID 已分配给某些主机上的另一个用户或组,则创建将失败。 + +```shell +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=ubuntu gid=1000 user=ubuntu uid=1000" play.yaml + +PLAY [all] ************************************************************************************************ + +TASK [Gathering Facts] ************************************************************************************ +ok: [172.16.255.180] +ok: [172.16.255.163] + +TASK [Ensure group ubuntu with gid 1000 exists] *********************************************************** +ok: [172.16.255.163] +fatal: [172.16.255.180]: FAILED! => {"changed": false, "msg": "groupmod: GID '1000' already exists\n", "name": "ubuntu"} + +TASK [Ensure user ubuntu with uid 1000 exists] ************************************************************ +ok: [172.16.255.163] + to retry, use: --limit @/home/ubuntu/account-sync/play.retry + +PLAY RECAP ************************************************************************************************ +172.16.255.163 : ok=3 changed=0 unreachable=0 failed=0 +172.16.255.180 : ok=1 changed=0 unreachable=0 failed=1 +``` + +在上面的示例中,组 ID 1000 已分配给主机 `172.16.255.180` 上的另一个组,我们应该 **更改 GID** 或 **删除主机 `172.16.255.180` 上 GID 为 1000** 的组,然后再次运行 playbook。 + +:::caution 注意 +如果用户帐户已经存在于主机上,并且我们将其更改为另一个 UID 或 GID 值,则用户可能会失去对他们以前拥有的文件和目录的权限。例如: + +```shell +$ ls -l /tmp/hello.txt +-rw-r--r-- 1 alice staff 6 Apr 26 21:43 /tmp/hello.txt +$ id alice +uid=1200(alice) gid=500(staff) groups=500(staff) +``` + +我们将 alice 的 UID 从 1200 改为 1201 + +```shell +~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \ +--extra-vars "group=staff gid=500 user=alice uid=1201" play.yaml +``` + +现在我们没有权限删除这个文件,因为它的所有者不是 alice: + +```shell +$ ls -l /tmp/hello.txt +-rw-r--r-- 1 1200 staff 6 Apr 26 21:43 /tmp/hello.txt +$ rm /tmp/hello.txt +rm: remove write-protected regular file '/tmp/hello.txt'? y +rm: cannot remove '/tmp/hello.txt': Operation not permitted +``` +::: diff --git a/docs/zh_cn/benchmark/_performance_tuning.md b/docs/zh_cn/benchmark/_performance_tuning.md new file mode 100644 index 0000000..f8dab85 --- /dev/null +++ b/docs/zh_cn/benchmark/_performance_tuning.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 性能调优 +sidebar_position: 5 +--- +# JuiceFS 性能调优 \ No newline at end of file diff --git a/docs/zh_cn/benchmark/benchmark.md b/docs/zh_cn/benchmark/benchmark.md new file mode 100644 index 0000000..4ff3c2e --- /dev/null +++ b/docs/zh_cn/benchmark/benchmark.md @@ -0,0 +1,28 @@ +--- +sidebar_label: 常规测试 +sidebar_position: 1 +slug: . +--- +# JuiceFS 常规测试 + +### 基础测试 + +JuiceFS 提供了 `bench` 子命令来运行一些基本的基准测试,用以评估 JuiceFS 在当前环境的运行情况: + +![JuiceFS Bench](../images/juicefs-bench.png) + +### 吞吐量 + +使用 [fio](https://github.com/axboe/fio) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行连续读写测试,结果如下: + +[![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)](../images/sequential-read-write-benchmark.svg) + +结果表明,JuiceFS 可以提供比另外两个工具大 10 倍的吞吐量,[了解更多](fio.md)。 + +### 元数据 IOPS + +使用 [mdtest](https://github.com/hpc/ior) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行简易的 mdtest 基准测试,结果如下: + +[![Metadata Benchmark](../images/metadata-benchmark.svg)](../images/metadata-benchmark.svg) + +结果表明,JuiceFS 可以提供比另外两个工具更高的元数据 IOPS,[了解更多](mdtest.md)。 diff --git a/docs/zh_cn/benchmark/fio.md b/docs/zh_cn/benchmark/fio.md new file mode 100644 index 0000000..7c00a10 --- /dev/null +++ b/docs/zh_cn/benchmark/fio.md @@ -0,0 +1,73 @@ +--- +sidebar_label: fio 基准测试 +sidebar_position: 7 +slug: /fio +--- +# fio 基准测试 + +## 测试方法 + +使用 [fio](https://github.com/axboe/fio) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行顺序读、顺序写基准测试。 + +## 测试工具 + +以下测试使用的工具为 fio 3.1。 + +顺序读测试 (任务数: 1): + +``` +fio --name=sequential-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G +fio --name=sequential-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G +fio --name=sequential-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G +``` + +顺序写测试 (任务数: 1): + +``` +fio --name=sequential-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +fio --name=sequential-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +fio --name=sequential-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1 +``` + +顺序读测试 (任务数: 16): + +``` +fio --name=big-file-multi-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +fio --name=big-file-multi-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +fio --name=big-file-multi-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16 +``` + +顺序写测试 (任务数: 16): + +``` +fio --name=big-file-multi-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +fio --name=big-file-multi-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +fio --name=big-file-multi-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1 +``` + +## 测试环境 + +以下测试结果均使用 fio 在亚马逊云 c5d.18xlarge EC2 (72 CPU, 144G RAM) 实例得出,操作系统采用 Ubuntu 18.04 LTS (Kernel 5.4.0) ,JuiceFS 使用同主机的本地 Redis (version 4.0.9) 实例存储元数据。 + +JuiceFS 挂载命令: + +``` +./juicefs format --storage=s3 --bucket=https://.s3..amazonaws.com localhost benchmark +./juicefs mount --max-uploads=150 --io-retries=20 localhost /jfs +``` + +EFS 挂载命令 (与配置说明中一致): + +``` +mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, .efs..amazonaws.com:/ /efs +``` + +S3FS (version 1.82) 挂载命令: + +``` +s3fs :/s3fs /s3fs -o host=https://s3..amazonaws.com,endpoint=,passwd_file=${HOME}/.passwd-s3fs +``` + +## 测试结果 + +![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg) diff --git a/docs/zh_cn/benchmark/mdtest.md b/docs/zh_cn/benchmark/mdtest.md new file mode 100644 index 0000000..f63eb84 --- /dev/null +++ b/docs/zh_cn/benchmark/mdtest.md @@ -0,0 +1,122 @@ +--- +sidebar_label: mdtest 基准测试 +sidebar_position: 8 +slug: /mdtest +--- +# mdtest 基准测试 + +## 测试方法 + +使用 [mdtest](https://github.com/hpc/ior),分别在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行元数据性能测试。 + +## 测试工具 + +以下测试使用 mdtest 3.4。 +调整 mdtest 的参数以确保命令可以在 5 分钟内完成。 + +``` +./mdtest -d /s3fs/mdtest -b 6 -I 8 -z 2 +./mdtest -d /efs/mdtest -b 6 -I 8 -z 4 +./mdtest -d /jfs/mdtest -b 6 -I 8 -z 4 +``` + +## 测试环境 + +在下面的测试结果中,所有 mdtest 均在亚马逊云 c5.large EC2 实例(2 CPU,4G RAM),Ubuntu 18.04 LTS(Kernel 5.4.0)系统上进行,JuiceFS 使用的 Redis(4.0.9版本)实例运行在相同区域的 c5.large EC2 实例上。 + +JuiceFS 挂载命令: + +``` +./juicefs format --storage=s3 --bucket=https://.s3..amazonaws.com localhost benchmark +nohup ./juicefs mount localhost /jfs & +``` + +EFS 挂载命令 (与配置说明保持一致): + +``` +mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, .efs..amazonaws.com:/ /efs +``` + +S3FS (version 1.82) 挂载命令: + +``` +s3fs :/s3fs /s3fs -o host=https://s3..amazonaws.com,endpoint=,passwd_file=${HOME}/.passwd-s3fs +``` + +## 测试结果 + +![Metadata Benchmark](../images/metadata-benchmark.svg) + +### S3FS +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/s3fs/mdtest' '-b' '6' '-I' '8' '-z' '2' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /s3fs/mdtest +FS : 256.0 TiB Used FS: 0.0% Inodes: 0.0 Mi Used Inodes: -nan% +Nodemap: 1 +1 tasks, 344 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 5.977 5.977 5.977 0.000 + Directory stat : 435.898 435.898 435.898 0.000 + Directory removal : 8.969 8.969 8.969 0.000 + File creation : 5.696 5.696 5.696 0.000 + File stat : 68.692 68.692 68.692 0.000 + File read : 33.931 33.931 33.931 0.000 + File removal : 23.658 23.658 23.658 0.000 + Tree creation : 5.951 5.951 5.951 0.000 + Tree removal : 9.889 9.889 9.889 0.000 +``` + +### EFS + +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/efs/mdtest' '-b' '6' '-I' '8' '-z' '4' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /efs/mdtest +FS : 8388608.0 TiB Used FS: 0.0% Inodes: 0.0 Mi Used Inodes: -nan% +Nodemap: 1 +1 tasks, 12440 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 192.301 192.301 192.301 0.000 + Directory stat : 1311.166 1311.166 1311.166 0.000 + Directory removal : 213.132 213.132 213.132 0.000 + File creation : 179.293 179.293 179.293 0.000 + File stat : 915.230 915.230 915.230 0.000 + File read : 371.012 371.012 371.012 0.000 + File removal : 217.498 217.498 217.498 0.000 + Tree creation : 187.906 187.906 187.906 0.000 + Tree removal : 218.357 218.357 218.357 0.000 +``` + +### JuiceFS + +``` +mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s) +Command line used: ./mdtest '-d' '/jfs/mdtest' '-b' '6' '-I' '8' '-z' '4' +WARNING: Read bytes is 0, thus, a read test will actually just open/close. +Path : /jfs/mdtest +FS : 1024.0 TiB Used FS: 0.0% Inodes: 10.0 Mi Used Inodes: 0.0% +Nodemap: 1 +1 tasks, 12440 files/directories + +SUMMARY rate: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation : 1416.582 1416.582 1416.582 0.000 + Directory stat : 3810.083 3810.083 3810.083 0.000 + Directory removal : 1115.108 1115.108 1115.108 0.000 + File creation : 1410.288 1410.288 1410.288 0.000 + File stat : 5023.227 5023.227 5023.227 0.000 + File read : 3487.947 3487.947 3487.947 0.000 + File removal : 1163.371 1163.371 1163.371 0.000 + Tree creation : 1503.004 1503.004 1503.004 0.000 + Tree removal : 1119.806 1119.806 1119.806 0.000 +``` diff --git a/docs/zh_cn/benchmark/metadata_engines_benchmark.md b/docs/zh_cn/benchmark/metadata_engines_benchmark.md new file mode 100644 index 0000000..820672d --- /dev/null +++ b/docs/zh_cn/benchmark/metadata_engines_benchmark.md @@ -0,0 +1,186 @@ +--- +sidebar_label: 元数据引擎性能测试 +sidebar_position: 6 +slug: /metadata_engines_benchmark +--- +# 元数据引擎性能对比测试 + +首先展示结论: + +- 对于纯元数据操作,MySQL 耗时约为 Redis 的 2~4 倍;TiKV 性能与 MySQL 接近,大部分场景下略优于 MySQL +- 对于小 IO(~100 KiB)压力,使用 MySQL 引擎的操作总耗时大约是使用 Redis 引擎总耗时的 1~3 倍;TiKV 耗时与 MySQL 接近 +- 对于大 IO(~4 MiB)压力,使用不同元数据引擎的总耗时未见明显差异(此时对象存储成为瓶颈) + +> **注意**: +> +> 1. Redis 可以通过将 `appendfsync` 配置项由 `always` 改为 `everysec`,牺牲少量可靠性来换取一定的性能提升;更多信息可参见[这里](https://redis.io/topics/persistence) +> 2. 测试中 Redis 和 MySQL 数据均仅在本地存储单副本,TiKV 数据会在三个节点间通过 Raft 协议存储三副本 + +以下提供了测试的具体细节。这些测试都运行在相同的对象存储(用来存放数据),客户端和元数据节点上;只有元数据引擎不同。 + +## 测试环境 + +### JuiceFS 版本 + +juicefs version 0.16-dev (2021-07-20 9efa870) + +### 对象存储 + +Amazon S3 + +### 客户端节点 + +- Amazon c5.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network +- Ubuntu 18.04.4 LTS + +### 元数据节点 + +- Amazon c5d.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network, 100 GB SSD(为元数据引擎提供本地存储) +- Ubuntu 18.04.4 LTS +- SSD 数据盘被格式化为 ext4 文件系统并挂载到 `/data` 目录 + +### 元数据引擎 + +#### Redis + +- 版本: [6.2.3](https://download.redis.io/releases/redis-6.2.3.tar.gz) +- 配置: + - appendonly: yes + - appendfsync: 分别测试了 always 和 everysec + - dir: `/data/redis` + +#### MySQL + +- 版本: 8.0.25 +- `/var/lib/mysql` 目录被绑定挂载到 `/data/mysql` + +### TiKV + +- 版本: 5.1.0 +- 配置: + - deploy_dir: `/data/tikv-deploy` + - data_dir: `/data/tikv-data` + +## 测试工具 + +每种元数据引擎都会运行以下所有测试。 + +### Golang Benchmark + +在源码中提供了简单的元数据基准测试: `pkg/meta/benchmarks_test.go`。 + +### JuiceFS Bench + +JuiceFS 提供了一个基础的性能测试命令: + +```bash +$ ./juicefs bench /mnt/jfs +``` + +### mdtest + +- 版本: mdtest-3.4.0+dev + +在3个客户端节点上并发执行测试: + +```bash +$ cat myhost +client1 slots=4 +client2 slots=4 +client3 slots=4 +``` + +测试命令: + +```bash +# meta only +$ mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -b 3 -z 1 -I 100 -u -d /mnt/jfs + +# 12000 * 100KiB files +$ mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -F -w 102400 -I 1000 -z 0 -u -d /mnt/jfs +``` + +### fio + +- 版本: fio-3.1 + +```bash +fio --name=big-write --directory=/mnt/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=4 --end_fsync=1 --group_reporting +``` + +## 测试结果 + +### Golang Benchmark + +- 展示了操作耗时(单位为 微秒/op),数值越小越好 +- 括号内数字是该指标对比 Redis-Always 的倍数(`always` 和 `everysec` 均是 Redis 配置项 `appendfsync` 的可选值) +- 由于元数据缓存缘故,目前 `Read` 接口测试数据均小于 1 微秒,暂无对比意义 + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| ------------ | ------------ | -------------- | ----- | ---- | +| mkdir | 986 | 700 (0.7) | 2274 (2.3) | 1961 (2.0) | +| mvdir | 1116 | 940 (0.8) | 3690 (3.3) | 2145 (1.9) | +| rmdir | 981 | 817 (0.8) | 2980 (3.0) | 2300 (2.3) | +| readdir_10 | 376 | 378 (1.0) | 1365 (3.6) | 965 (2.6) | +| readdir_1k | 1804 | 1819 (1.0) | 15449 (8.6) | 6776 (3.8) | +| mknod | 968 | 665 (0.7) | 2325 (2.4) | 1997 (2.1) | +| create | 957 | 703 (0.7) | 2291 (2.4) | 1971 (2.1) | +| rename | 1082 | 1040 (1.0) | 3701 (3.4) | 2162 (2.0) | +| unlink | 842 | 710 (0.8) | 3293 (3.9) | 2217 (2.6) | +| lookup | 118 | 127 (1.1) | 409 (3.5) | 571 (4.8) | +| getattr | 108 | 120 (1.1) | 358 (3.3) | 285 (2.6) | +| setattr | 568 | 490 (0.9) | 1239 (2.2) | 1720 (3.0) | +| access | 109 | 116 (1.1) | 354 (3.2) | 283 (2.6) | +| setxattr | 237 | 113 (0.5) | 1197 (5.1) | 1508 (6.4) | +| getxattr | 110 | 108 (1.0) | 326 (3.0) | 279 (2.5) | +| removexattr | 244 | 116 (0.5) | 847 (3.5) | 1856 (7.6) | +| listxattr_1 | 111 | 106 (1.0) | 336 (3.0) | 286 (2.6) | +| listxattr_10 | 112 | 111 (1.0) | 376 (3.4) | 303 (2.7) | +| link | 715 | 574 (0.8) | 2610 (3.7) | 1949 (2.7) | +| symlink | 952 | 702 (0.7) | 2583 (2.7) | 1960 (2.1) | +| newchunk | 235 | 113 (0.5) | 1 (0.0) | 1 (0.0) | +| write | 816 | 564 (0.7) | 2788 (3.4) | 2138 (2.6) | +| read_1 | 0 | 0 (0.0) | 0 (0.0) | 0 (0.0) | +| read_10 | 0 | 0 (0.0) | 0 (0.0) | 0 (0.0) | + +### JuiceFS Bench + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| -------------- | -------------- | -------------- | -------------- | -------------- | +| Write big | 312.81 MiB/s | 303.45 MiB/s | 310.26 MiB/s | 310.90 MiB/s | +| Read big | 348.06 MiB/s | 525.78 MiB/s | 493.45 MiB/s | 477.78 MiB/s | +| Write small | 26.0 files/s | 27.5 files/s | 22.7 files/s | 24.2 files/s | +| Read small | 1431.6 files/s | 1113.4 files/s | 608.0 files/s | 415.7 files/s | +| Stat file | 6713.7 files/s | 6885.8 files/s | 2144.9 files/s | 1164.5 files/s | +| FUSE operation | 0.45 ms | 0.32 ms | 0.41 ms | 0.40 ms | +| Update meta | 1.04 ms | 0.79 ms | 3.36 ms | 1.74 ms | + +### mdtest + +- 展示了操作速率(每秒 OPS 数),数值越大越好 + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| ------------------ | ------------ | -------------- | --------- | --------- | +| **EMPTY FILES** | | | | | +| Directory creation | 4149.645 | 9261.190 | 1603.298 | 2023.177 | +| Directory stat | 172665.701 | 243307.527 | 15678.643 | 15029.717 | +| Directory removal | 4687.027 | 9575.706 | 1420.124 | 1772.861 | +| File creation | 4257.367 | 8994.232 | 1632.225 | 2119.616 | +| File stat | 158793.214 | 287425.368 | 15598.031 | 14466.477 | +| File read | 38872.116 | 47938.792 | 14004.083 | 17149.941 | +| File removal | 3831.421 | 10538.675 | 983.338 | 1497.474 | +| Tree creation | 100.403 | 108.657 | 44.154 | 15.615 | +| Tree removal | 127.257 | 143.625 | 51.804 | 21.005 | +| **SMALL FILES** | | | | | +| File creation | 317.999 | 317.925 | 272.272 | 280.493 | +| File stat | 54063.617 | 57798.963 | 13882.940 | 10984.141 | +| File read | 56891.010 | 57548.889 | 16038.716 | 7155.426 | +| File removal | 3638.809 | 8490.490 | 837.510 | 1184.253 | +| Tree creation | 54.523 | 119.317 | 23.336 | 5.233 | +| Tree removal | 73.366 | 82.195 | 22.783 | 4.918 | + +### fio + +| | Redis-Always | Redis-Everysec | MySQL | TiKV | +| --------------- | ------------ | -------------- | --------- | --------- | +| Write bandwidth | 350 MiB/s | 360 MiB/s | 360 MiB/s | 358 MiB/s | diff --git a/docs/zh_cn/benchmark/operations_profiling.md b/docs/zh_cn/benchmark/operations_profiling.md new file mode 100644 index 0000000..86061b1 --- /dev/null +++ b/docs/zh_cn/benchmark/operations_profiling.md @@ -0,0 +1,56 @@ +--- +sidebar_label: 性能诊断 +sidebar_position: 3 +slug: /operations_profiling +--- +# JuiceFS 性能诊断 + +## 介绍 + +JuiceFS 文件系统挂载以后,在文件系统的根目录中有一个名为 [`.accesslog`](../administration/fault_diagnosis_and_analysis.md#访问日志) 的特殊虚拟文件,用于跟踪其客户端中发生的每个操作。在负载压力较大的情况下,此文件每秒可能会生成数千个日志记录,很难确定特定时间的实际情况。因此,我们制作了一个名为 [`juicefs profile`](../reference/command_reference.md#juicefs-profile) 的简单工具,可以显示最近完成操作的概述。目的是汇总过去某个时间的所有日志并定期显示统计信息,例如: + +![juicefs-profiling](../images/juicefs-profiling.gif) + +## 诊断模式 + +目前有两种诊断模式:`实时模式` 和 `回放模式`。 + +### 实时模式 + +通过执行以下命令,您可以观察挂载点上的实时操作: + +```bash +$ juicefs profile MOUNTPOINT +``` + +> **提示**:输出结果按总时间降序排列。 + +### 回放模式 + +在现有的日志文件上运行 `profile` 命令将启用「回放模式」: + +```bash +$ juicefs profile LOGFILE +``` + +在调试或分析性能问题时,更实用的做法通常是先记录访问日志,然后重放(多次)。例如: + +```bash +$ cat /jfs/.accesslog > /tmp/jfs-oplog +# later +$ juicefs profile /tmp/jfs-oplog +``` + +> **提示 1**:可以随时按键盘上的 Enter/Return 暂停/继续回放。 +> +> **提示 2**:如果设置 `--interval 0`,将立即回放完整个日志文件并显示整体统计结果。 + +## 过滤 + +有时我们只对某个用户或进程感兴趣,可以通过指定其 ID 来过滤掉其他用户或进程。例如: + +```bash +$ juicefs profile /tmp/jfs-oplog --uid 12345 +``` + +更多信息,请运行 `juicefs profile -h` 命令查看。 diff --git a/docs/zh_cn/benchmark/performance_evaluation_guide.md b/docs/zh_cn/benchmark/performance_evaluation_guide.md new file mode 100644 index 0000000..95f5103 --- /dev/null +++ b/docs/zh_cn/benchmark/performance_evaluation_guide.md @@ -0,0 +1,284 @@ +--- +sidebar_label: 性能评估指南 +sidebar_position: 2 +slug: /performance_evaluation_guide +--- +# JuiceFS 性能评估指南 + +在进行性能测试之前,最好写下该使用场景的大致描述,包括: + +1. 对接的应用是什么?比如 Apache Spark、PyTorch 或者是自己写的程序等 +2. 应用运行的资源配置,包括 CPU、内存、网络,以及节点规模 +3. 预计的数据规模,包括文件数量和容量 +4. 文件的大小和访问模式(大文件或者小文件,顺序读写或者随机读写) +5. 对性能的要求,比如每秒要写入或者读取的数据量、访问的 QPS 或者操作的延迟等 + +以上这些内容越清晰、越详细,就越容易制定合适的测试计划,以及需要关注的性能指标,来判断应用对存储系统各方面的需求,包括 JuiceFS 元数据配置、网络带宽要求、配置参数等。当然,在一开始就清晰地写出上面所有的内容并不容易,有些内容可以在测试过程中逐渐明确,**但是在一次完整的测试结束时,以上使用场景描述以及相对应的测试方法、测试数据、测试结果都应该是完整的**。 + +如果上面的内容还不明确,不要紧,JuiceFS 内置的测试工具可以一行命令得到单机基准性能的核心指标。同时本文还会介绍两个 JuiceFS 内置的性能分析工具,在做更复杂的测试时,这两个工具能帮你简单清晰的分析出 JuiceFS 性能表现背后的原因。 + +## 性能测试快速上手 + +以下示例介绍 JuiceFS 内置的 bench 工具的基本用法。 + +### 环境配置 + +- 测试主机:Amazon EC2 c5.xlarge 一台 +- 操作系统:Ubuntu 20.04.1 LTS (Kernel 5.4.0-1029-aws) +- 元数据引擎:Redis 6.2.3, 存储(dir)配置在系统盘 +- 对象存储:Amazon S3 +- JuiceFS version:0.17-dev (2021-09-23 2ec2badf) + +### JuiceFS Bench + +JuiceFS `bench` 命令可以帮助你快速完成单机性能测试,通过测试结果判断环境配置和性能表现是否正常。假设你已经把 JuiceFS 挂载到了测试机器的 `/mnt/jfs` 位置(如果在 JuiceFS 初始化、挂载方面需要帮助,请参考[快速上手指南](../getting-started/for_local.md)),执行以下命令即可(推荐 `-p` 参数设置为测试机器的 CPU 核数): + +```bash +juicefs bench /mnt/jfs -p 4 +``` + +测试结果会将各项性能指标显示为绿色,黄色或红色。若您的结果中有红色指标,请先检查相关配置,需要帮助可以在 [GitHub Discussions](https://github.com/juicedata/juicefs/discussions) 详细描述你的问题。 + +![bench](../images/bench-guide-bench.png) + +JuiceFS `bench` 基准性能测试的具体流程如下(它的实现逻辑非常简单,有兴趣了解细节的可以直接看[源码](https://github.com/juicedata/juicefs/blob/main/cmd/bench.go)): + +1. N 并发各写 1 个 1 GiB 的大文件,IO 大小为 1 MiB +2. N 并发各读 1 个之前写的 1 GiB 的大文件,IO 大小为 1 MiB +3. N 并发各写 100 个 128 KiB 的小文件,IO 大小为 128 KiB +4. N 并发各读 100 个之前写的 128 KiB 的小文件,IO 大小为 128 KiB +5. N 并发各 stat 100 个之前写的 128 KiB 的小文件 +6. 清理测试用的临时目录 + +并发数 N 的值即由 `bench` 命令中的 `-p` 参数指定。 + +在这用 AWS 提供的几种常用存储类型做个性能比较: + +- EFS 1TiB 容量时,读 150MiB/s,写 50MiB/s,价格是 $0.08/GB-month +- EBS st1 是吞吐优化型 HDD,最大吞吐 500MiB/s,最大 IOPS(1MiB I/O)500,最大容量 16TiB,价格是 $0.045/GB-month +- EBS gp2 是通用型 SSD,最大吞吐 250MiB/s,最大 IOPS(16KiB I/O)16000,最大容量 16TiB,价格是 $0.10/GB-month + +不难看出,在上面的测试中,JuiceFS 的顺序读写能力明显优于 AWS EFS,吞吐能力也超过了常用的 EBS。但是写小文件的速度不算快,因为每写一个文件都需要将数据持久化到 S3 中,调用对象存储 API 通常有 10~30ms 的固定开销。 + +:::note 注 +Amazon EFS 的性能与容量线性相关([参考官方文档](https://docs.aws.amazon.com/efs/latest/ug/performance.html#performancemodes)),这样就不适合用在小数据量高吞吐的场景中。 +::: + +:::note 注 +价格参考 [AWS 美东区(US East, Ohio Region)](https://aws.amazon.com/ebs/pricing/?nc1=h_ls),不同 Region 的价格有细微差异。 +::: + +:::note 注 +以上数据来自 [AWS 官方文档](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html),性能指标为最大值,EBS 的实际性能与卷容量和挂载 EC2 实例类型相关,总的来说是越大容量,搭配约高配置的 EC2,得到的 EBS 性能越好,但不超过上面提到的最大值。 +::: + +## 性能观测和分析工具 + +接下来介绍两个性能观测和分析工具,是 JuiceFS 测试、使用、调优过程中必备的利器。 + +### JuiceFS Stats + +JuiceFS `stats` 是一个实时统计 JuiceFS 性能指标的工具,类似 Linux 系统的 `dstat` 命令,可以实时显示 JuiceFS 客户端的指标变化(详细说明和使用方法见[文档](./stats_watcher.md))。执行 `juicefs bench` 时,在另一个会话中执行以下命令: + +```bash +juicefs stats /mnt/jfs --verbosity 1 +``` + +结果如下,可以将其与上述基准测试流程对照来看,更易理解: + +![stats](../images/bench-guide-stats.png) + +其中各项指标具体含义如下: + +- usage + - cpu: JuiceFS 进程消耗的 CPU + - mem: JuiceFS 进程占用的物理内存 + - buf: JuiceFS 进程内部的读写 buffer 大小,受挂载选项 `--buffer-size` 限制 + - cache: 内部指标,可不关注 +- fuse + - ops/lat: FUSE 接口每秒处理的请求个数及其平均时延(单位为毫秒) + - read/write: FUSE 接口每秒处理读写请求的带宽值 +- meta + - ops/lat: 元数据引擎每秒处理的请求个数及其平均时延(单位为毫秒)。请注意部分能在缓存中直接处理的请求未列入统计,以更好地体现客户端与元数据引擎交互的耗时。 + - txn/lat: 元数据引擎每秒处理的**写事务**个数及其平均时延(单位为毫秒)。只读请求如 `getattr` 只会计入 ops 而不会计入 txn。 + - retry: 元数据引擎每秒重试**写事务**的次数 +- blockcache + - read/write: 客户端本地数据缓存的每秒读写流量 +- object + - get/get_c/lat: 对象存储每秒处理**读请求**的带宽值,请求个数及其平均时延(单位为毫秒) + - put/put_c/lat: 对象存储每秒处理**写请求**的带宽值,请求个数及其平均时延(单位为毫秒) + - del_c/lat: 对象存储每秒处理**删除请求**的个数和平均时延(单位为毫秒) + +### JuiceFS Profile + +JuiceFS `profile` 一方面用来实时输出 JuiceFS 客户端的所有访问日志,包含每个请求的信息。同时,它也可以用来回放、统计 JuiceFS 访问日志,方便用户直观了解 JuiceFS 的运行情况(详细的说明和使用方法见[文档](./operations_profiling.md))。执行 `juicefs bench` 时,在另一个会话中执行以下命令: + +```bash +cat /mnt/jfs/.accesslog > access.log +``` + +其中 `.accesslog` 是一个虚拟文件,它平时不会产生任何数据,只有在读取(如执行 `cat`)时才会有 JuiceFS 的访问日志输出。结束后使用 Ctrl-C 结束 `cat` 命令,并运行: + +```bash +juicefs profile access.log --interval 0 +``` + +其中 `--interval` 参数设置访问日志的采样间隔,设为 0 时用于快速重放一个指定的日志文件,生成统计信息,如下图所示: + +![profile](../images/bench-guide-profile.png) + +从之前基准测试流程描述可知,本次测试过程一共创建了 (1 + 100) * 4 = 404 个文件,每个文件都经历了「创建 → 写入 → 关闭 → 打开 → 读取 → 关闭 → 删除」的过程,因此一共有: + +- 404 次 create,open 和 unlink 请求 +- 808 次 flush 请求:每当文件关闭时会自动调用一次 flush +- 33168 次 write/read 请求:每个大文件写入了 1024 个 1 MiB IO,而在 FUSE 层请求的默认最大值为 128 KiB,也就是说每个应用 IO 会被拆分成 8 个 FUSE 请求,因此一共有 (1024 * 8 + 100) * 4 = 33168 个请求。读 IO 与之类似,计数也相同。 + +以上这些值均能与 `profile` 的结果完全对应上。另外,结果中还显示 write 的平均时延非常小(45 微秒),而主要耗时点在 flush。这是因为 JuiceFS 的 write 默认先写入内存缓冲区,在文件关闭时再调用 flush 上传数据到对象存储,与预期吻合。 + +## 其他测试工具配置示例 + +### Fio 单机性能测试 + +Fio 是业界常用的一个性能测试工具,完成 JuiceFS bench 后可以用它来做更复杂的性能测试。 + +#### 环境配置 + +与 [JuiceFS Bench](#环境配置) 测试环境一致。 + +#### 测试任务 + +执行下面四个 Fio 任务,分别进行顺序写、顺序读、随机写、随机读测试。 + +顺序写 + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=write --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +顺序读 + +```bash +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=read --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +随机写 + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randwrite --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +随机读 + +```shell +fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randread --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting +``` + +参数说明: + +- `--name`:用户指定的测试名称,会影响测试文件名 +- `--directory`:测试目录 +- `--ioengine`:测试时下发 IO 的方式;通常用 libaio 即可 +- `--rw`:常用的有 read,write,randread,randwrite,分别代表顺序读写和随机读写 +- `--bs`:每次 IO 的大小 +- `--size`:每个线程的 IO 总大小;通常就等于测试文件的大小 +- `--numjobs`:测试并发线程数;默认每个线程单独跑一个测试文件 +- `--direct`:在打开文件时添加 `O_DIRECT` 标记位,不使用系统缓冲,可以使测试结果更稳定准确 + +结果如下: + +```bash +# Sequential +WRITE: bw=703MiB/s (737MB/s), 703MiB/s-703MiB/s (737MB/s-737MB/s), io=4096MiB (4295MB), run=5825-5825msec +READ: bw=817MiB/s (856MB/s), 817MiB/s-817MiB/s (856MB/s-856MB/s), io=4096MiB (4295MB), run=5015-5015msec + +# Random +WRITE: bw=285MiB/s (298MB/s), 285MiB/s-285MiB/s (298MB/s-298MB/s), io=4096MiB (4295MB), run=14395-14395msec +READ: bw=93.6MiB/s (98.1MB/s), 93.6MiB/s-93.6MiB/s (98.1MB/s-98.1MB/s), io=4096MiB (4295MB), run=43773-43773msec +``` + +### Vdbench 多机性能测试 + +Vdbench 也是业界常见的文件系统评测工具,且很好地支持了多机并发测试。 + +#### 测试环境 + +与 [JuiceFS Bench](#环境配置) 测试环境类似,只是多开了两台同配置主机,一共三台。 + +#### 准备工作 + +需要在每个节点相同路径下安装 vdbench: + +1. [官网](https://www.oracle.com/downloads/server-storage/vdbench-downloads.html)下载 50406 版本 +2. 安装 Java:`apt-get install openjdk-8-jre` +3. 测试 vdbench 安装成功:`./vdbench -t` + +然后,假设三个节点名称分别为 node0,node1 和 node2,则需在 node0 上创建配置文件,如下(测试大量小文件读写): + +```bash +$ cat jfs-test +hd=default,vdbench=/root/vdbench50406,user=root +hd=h0,system=node0 +hd=h1,system=node1 +hd=h2,system=node2 + +fsd=fsd1,anchor=/mnt/jfs/vdbench,depth=1,width=100,files=3000,size=128k,shared=yes + +fwd=default,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd1,host=h0 +fwd=fwd2,host=h1 +fwd=fwd3,host=h2 + +rd=rd1,fwd=fwd*,fwdrate=max,format=yes,elapsed=300,interval=1 +``` + +参数说明: + +- `vdbench=/root/vdbench50406`:指定了 vdbench 工具的安装路径 +- `anchor=/mnt/jfs/vdbench`:指定了每个节点上运行测试任务的路径 +- `depth=1,width=100,files=3000,size=128k`:定义了测试任务文件树结构,即测试目录下再创建 100 个目录,每个目录内包含 3000 个 128 KiB 大小的文件,一共 30 万个文件 +- `operation=read,xfersize=128k,fileio=random,fileselect=random`:定义了实际的测试任务,即随机选择文件下发 128 KiB 大小的读请求 + +结果如下: + +``` +FILE_CREATES Files created: 300,000 498/sec +READ_OPENS Files opened for read activity: 188,317 627/sec +``` + +系统整体创建 128 KiB 文件速度为每秒 498 个,读取文件速度为每秒 627 个。 + +#### 其他参考示例 + +以下是一些本地简单评估文件系统性能时可用的配置文件,以供参考;具体测试集规模和并发数可根据实际情况调整。 + +##### 顺序读写大文件 + +文件大小均为 1GiB,其中 `fwd1` 是顺序写大文件,`fwd2` 是顺序读大文件。 + +```bash +$ cat local-big +fsd=fsd1,anchor=/mnt/jfs/local-big,depth=1,width=1,files=4,size=1g,openflags=o_direct + +fwd=fwd1,fsd=fsd1,operation=write,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4 +fwd=fwd2,fsd=fsd1,operation=read,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4 + +rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1 +``` + +##### 随机读写小文件 + +文件大小均为 128KiB,其中 `fwd1` 是随机写小文件,`fwd2` 是随机读小文件,`fwd3` 是混合读写小文件(读写比 = 7:3)。 + +```bash +$ cat local-small +fsd=fsd1,anchor=/mnt/jfs/local-small,depth=1,width=20,files=2000,size=128k,openflags=o_direct + +fwd=fwd1,fsd=fsd1,operation=write,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd2,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4 +fwd=fwd3,fsd=fsd1,rdpct=70,xfersize=128k,fileio=random,fileselect=random,threads=4 + +rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1 +rd=rd3,fwd=fwd3,fwdrate=max,format=restart,elapsed=120,interval=1 +``` diff --git a/docs/zh_cn/benchmark/stats_watcher.md b/docs/zh_cn/benchmark/stats_watcher.md new file mode 100644 index 0000000..4c86be9 --- /dev/null +++ b/docs/zh_cn/benchmark/stats_watcher.md @@ -0,0 +1,37 @@ +--- +sidebar_label: 性能统计监控 +sidebar_position: 4 +slug: /stats_watcher +--- +# JuiceFS 性能统计监控 + +JuiceFS 预定义了许多监控指标来监测系统运行时的内部性能情况,并通过 Prometheus API [暴露对外接口](../administration/monitoring.md)。然而, 在分析一些实际问题时,用户往往需要更实时的性能统计监控。因此,我们开发了 `stats` 命令,以类似 Linux `dstat` 工具的形式实时打印各个指标的每秒变化情况,如下图所示: + +![stats_watcher](../images/juicefs_stats_watcher.png) + +默认参数下,此命令会监控指定挂载点对应的 JuiceFS 进程的以下几个指标: + +#### usage + +- cpu:进程的 CPU 使用率 +- mem:进程的物理内存使用量 +- buf:进程已使用的 Buffer 大小;此值受限于挂载选项 `--buffer-size` + +#### fuse + +- ops/lat:通过 FUSE 接口处理的每秒请求数及其平均时延(单位为毫秒) +- read/write:通过 FUSE 接口处理的读写带宽 + +#### meta + +- ops/lat:每秒处理的元数据请求数和平均时延(单位为毫秒)。注意部分能在缓存中直接处理的元数据请求未列入统计,以更好地体现客户端与元数据引擎交互的耗时。 + +#### blockcache + +- read/write:客户端本地数据缓存的每秒读写流量 + +#### object + +- get/put:客户端与对象存储交互的 Get/Put 每秒流量 + +此外,可以通过设置 `--verbosity 1` 来获取更详细的统计信息(如读写请求的个数和平均时延统计等),也可以通过修改 `--schema` 来自定义监控内容与格式。更多的命令信息请通过执行 `juicefs stats -h` 查看。 diff --git a/docs/zh_cn/client_compile_and_upgrade.md b/docs/zh_cn/client_compile_and_upgrade.md new file mode 100644 index 0000000..c27c1cc --- /dev/null +++ b/docs/zh_cn/client_compile_and_upgrade.md @@ -0,0 +1,38 @@ +# JuiceFS 编译安装和升级 + +## 从源代码手动编译 + +如果你想优先体验 JuiceFS 的新功能,可以从我们仓库的 main 分支克隆代码,手动编译最新的客户端。 + +### 克隆源码 + +```shell +$ git clone https://github.com/juicedata/juicefs.git +``` + +### 执行编译 + +JuiceFS 客户端使用 Go 语言开发,因此在编译之前,你提前在本地安装好依赖的工具: + +- [Go](https://golang.org) 1.16+ +- GCC 5.4+ + +> **提示**:对于中国地区用户,为了加快获取 Go 模块的速度,建议通过 `GOPROXY` 环境变量设置国内的镜像服务器。例如:[Goproxy China](https://github.com/goproxy/goproxy.cn)。 + +进入源代码目录: + +```shell +$ cd juicefs +``` + +开始编译: + +```shell +$ make +``` + +编译成功以后,可以在当前目录中找到编译好的 `juicefs` 二进制程序。 + +## JuiceFS 客户端升级 + +JuiceFS 客户端是一个名为 `juicefs` 二进制文件,升级时只需使用新版二进制文件替换旧版即可。 diff --git a/docs/zh_cn/community/_adopters.md b/docs/zh_cn/community/_adopters.md new file mode 100644 index 0000000..a3495ea --- /dev/null +++ b/docs/zh_cn/community/_adopters.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 谁在使用 +sidebar_position: 1 +--- +# 谁在使用 \ No newline at end of file diff --git a/docs/zh_cn/community/_integrations.md b/docs/zh_cn/community/_integrations.md new file mode 100644 index 0000000..6b6e8d7 --- /dev/null +++ b/docs/zh_cn/community/_integrations.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 项目集成 +sidebar_position: 2 +--- +# 项目集成 diff --git a/docs/zh_cn/community/_roadmap.md b/docs/zh_cn/community/_roadmap.md new file mode 100644 index 0000000..d52e750 --- /dev/null +++ b/docs/zh_cn/community/_roadmap.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 路线图 +sidebar_position: 3 +--- +# 路线图 \ No newline at end of file diff --git a/docs/zh_cn/community/usage_tracking.md b/docs/zh_cn/community/usage_tracking.md new file mode 100644 index 0000000..935092e --- /dev/null +++ b/docs/zh_cn/community/usage_tracking.md @@ -0,0 +1,14 @@ +--- +sidebar_label: 用量上报 +sidebar_position: 4 +--- + +# 用量上报 + +JuiceFS 默认会收集并上报**「匿名」**的使用数据。这些数据仅仅包含核心指标(如版本号、文件系统大小),不会包含任何用户信息或者敏感数据。你可以查看[这里](https://github.com/juicedata/juicefs/blob/main/pkg/usage/usage.go)检查相关代码。 + +这些数据帮助我们理解社区如何使用这个项目。你可以简单地通过 `--no-usage-report` 选项关闭用量上报: + +``` +$ juicefs mount --no-usage-report +``` diff --git a/docs/zh_cn/comparison/juicefs_vs_alluxio.md b/docs/zh_cn/comparison/juicefs_vs_alluxio.md new file mode 100644 index 0000000..055a74d --- /dev/null +++ b/docs/zh_cn/comparison/juicefs_vs_alluxio.md @@ -0,0 +1,72 @@ +# JuiceFS 对比 Alluxio + +[Alluxio](https://www.alluxio.io)(/əˈlʌksio/)是大数据和机器学习生态系统中的数据访问层。最初作为研究项目「Tachyon」,它是在加州大学伯克利分校的 [AMPLab](https://en.wikipedia.org/wiki/AMPLab) 作为创建者 2013 年的博士论文创建的。Alluxio 于 2014 年开源。 + +下表显示了 Alluxio 和 JuiceFS 之间的主要功能差异。 + +| 特性 | Alluxio | JuiceFS | +| --------------------- | ------- | ------- | +| 存储格式 | Object | Block | +| 缓存粒度 | 64MiB | 4MiB | +| 多级缓存 | ✓ | ✓ | +| Hadoop 兼容 | ✓ | ✓ | +| S3 兼容 | ✓ | ✓ | +| Kubernetes CSI Driver | ✓ | ✓ | +| Hadoop 数据本地性 | ✓ | ✓ | +| 完全兼容 POSIX | ✕ | ✓ | +| 原子元数据操作 | ✕ | ✓ | +| 一致性 | ✕ | ✓ | +| 数据压缩 | ✕ | ✓ | +| 数据加密 | ✕ | ✓ | +| 零运维 | ✕ | ✓ | +| 开发语言 | Java | Go | +| 开源协议 | Apache License 2.0 | Apache License 2.0 | +| 开源时间 | 2011 | 2021.1 | + +### 存储格式 + +JuiceFS 中一个文件的[存储格式](../reference/how_juicefs_store_files.md)包含三个层级:chunk、slice 和 block。一个文件将被分割成多个块,并被压缩和加密(可选)存储到对象存储中。 + +Alluxio 将文件作为「对象」存储到 UFS。文件不会像 JuiceFS 那样被拆分成 block。 + +### 缓存粒度 + +JuiceFS 的[默认块大小](../reference/how_juicefs_store_files.md)为 4MiB,相比 Alluxio 的 64MiB,粒度更小。较小的块大小更适合随机读取(例如 Parquet 和 ORC)工作负载,即缓存管理将更有效率。 + +### Hadoop 兼容 + +JuiceFS [完整兼容 HDFS](../deployment/hadoop_java_sdk.md)。不仅兼容 Hadoop 2.x 和 Hadoop 3.x,还兼容 Hadoop 生态系统中的各种组件。 + +### Kubernetes CSI Driver + +JuiceFS 提供了 [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver) 来帮助在 Kubernetes 中便捷使用 JuiceFS。Alluxio 也提供了 [Kubernetes CSI Driver](https://github.com/Alluxio/alluxio-csi),但是这个项目维护得不够活跃,也没有得到 Alluxio 的官方支持。 + +### 完全兼容 POSIX + +JuiceFS [完全兼容 POSIX](../reference/posix_compatibility.md)。来自[京东](https://www.slideshare.net/Alluxio/using-alluxio-posix-fuse-api-in-jdcom)的一个 pjdfstest 显示 Alluxio 没有通过 POSIX 兼容性测试,例如 Alluxio 不支持符号链接、truncate、fallocate、append、xattr、mkfifo、mknod 和 utimes。除了 pjdfstest 涵盖的东西外,JuiceFS 还提供了关闭再打开(close-to-open)一致性、原子元数据操作、mmap、fallocate 打洞、xattr、BSD 锁(flock)和 POSIX 记录锁(fcntl)。 + +### 原子元数据操作 + +Alluxio 中的元数据操作有两个步骤:第一步是修改 Alluxio master 的状态,第二步是向 UFS 发送请求。可以看到,元数据操作不是原子的,当操作正在执行或发生任何故障时,其状态是不可预测的。Alluxio 依赖 UFS 来实现元数据操作,比如重命名文件操作会变成复制和删除操作。 + +感谢 [Redis 事务](https://redis.io/topics/transactions),**JuiceFS 的大部分元数据操作都是原子的**,例如重命名文件、删除文件、重命名目录。您不必担心一致性和性能。 + +### 一致性 + +Alluxio 根据需要从 UFS 加载元数据,并且它在启动时没有关于 UFS 的信息。默认情况下,Alluxio 期望对 UFS 的所有修改都通过 Alluxio 进行。如果直接对 UFS 进行更改,则需要手动或定期在 Alluxio 和 UFS 之间同步元数据。正如[「原子元数据操作」](#原子元数据操作)部分所说,两步元数据操作可能会导致不一致。 + +JuiceFS 提供元数据和数据的强一致性。**JuiceFS 的元数据服务是唯一的真实来源(single source of truth),不是 UFS 的镜像。** 元数据服务不依赖对象存储来获取元数据。对象存储只是被视为无限制的块存储。JuiceFS 和对象存储之间没有任何不一致之处。 + +### 数据压缩 + +JuiceFS 支持使用 [LZ4](https://lz4.github.io/lz4) 或 [Zstandard](https://facebook.github.io/zstd) 来压缩您的所有数据。Alluxio 没有这个功能。 + +### 数据加密 + +JuiceFS 支持传输中加密(encryption in transit)以及静态加密(encryption at rest)。Alluxio 社区版没有这个功能,但是[企业版](https://docs.alluxio.io/ee/user/stable/en/operation/Security.html#end-to-end-data-encryption)有。 + +### 零运维 + +Alluxio 的架构可以分为 3 个组件:master、worker 和客户端。一个典型的集群由一个主节点(master)、多个备用主节点(standby master)、一个作业主节点(job master)、多个备用作业主节点(standby job master)、多个 worker 和 job worker 组成。您需要自己运维这些节点。 + +JuiceFS 使用 Redis 或者[其它系统](../reference/how_to_setup_metadata_engine.md)作为元数据引擎。您可以轻松使用由公有云提供商托管的服务作为 JuiceFS 的元数据引擎,没有任何运维负担。 diff --git a/docs/zh_cn/comparison/juicefs_vs_cephfs.md b/docs/zh_cn/comparison/juicefs_vs_cephfs.md new file mode 100644 index 0000000..a67673b --- /dev/null +++ b/docs/zh_cn/comparison/juicefs_vs_cephfs.md @@ -0,0 +1,64 @@ +# JuiceFS 对比 CephFS + +## 共同点 + +两者都是高可靠,高性能的弹性分布式文件系统,且均有良好的 POSIX 兼容性,在各种文件系统使用场景都可一试。 + +## 不同点 + +### 系统架构 + +两者都采用了数据和元数据分离的架构,但在组件实现上有很大区别。 + +#### CephFS + +是一套完整且独立的系统,倾向于私有云部署;所有数据和元数据都会持久化在 Ceph 自己的存储池(RADOS Pool)中。 + +- 元数据 + - 服务进程(MDS):无状态且理论可水平扩展。目前已有成熟的主备机制,但多主部署依然有性能和稳定性隐患;生产环境通常采用一主多备或者多主静态隔离 + - 持久化:独立的 RADOS 存储池,通常采用 SSD 或更高性能的硬件存储 +- 数据:一个或多个 RADOS 存储池,支持通过 Layout 指定不同的配置,如分块大小(默认 4 MiB),冗余方式(多副本,EC)等 +- 客户端:支持内核客户端(kcephfs),用户态客户端(ceph-fuse)以及基于 libcephfs 实现的 C++、Python 等 SDK;近来社区也提供了 Windows 客户端(ceph-dokan)。同时生态中也有与 Samba 对接的 VFS object 和与 NFS-Ganesha 对接的 FSAL 模块可供考虑。 + +#### JuiceFS + +JuiceFS 主要实现一个 libjfs 库和 FUSE 客户端程序、Java SDK 等,支持对接多种元数据引擎和对象存储,适合在公有云、私有云或混合云环境下部署; + +- 元数据:支持多种已有的[数据库实现](../reference/how_to_setup_metadata_engine.md),包括: + - Redis 及各种兼容 Redis 协议的变种(需要支持事务); + - SQL 系列:MySQL,PostgreSQL,SQLite 等; + - 分布式 K/V 存储:已支持 TiKV,计划支持 Apple FoundationDB; + - 自研引擎:用于公有云上的 JuiceFS 全托管服务; +- 数据:支持超过 30 种公有云上的[对象存储](../reference/how_to_setup_object_storage.md),也可以和 MinIO,Ceph RADOS,Ceph RGW 等对接; +- 客户端:支持 Unix 用户态挂载,Windows 挂载,完整兼容 HDFS 语义的 Java SDK,[Python SDK](https://github.com/megvii-research/juicefs-python) 以及内置的 S3 网关。 + +### 功能特性 + +| | CephFS | JuiceFS | +| ----------------------- | ---------- | ------------- | +| 文件分块 [1] | ✓ | ✓ | +| 元数据事务 | ✓ | ✓ | +| 强一致性 | ✓ | ✓ | +| Kubernetes CSI Driver | ✓ | ✓ | +| Hadoop 兼容 | ✓ | ✓ | +| 数据压缩 [2] | ✓ | ✓ | +| 数据加密 [3] | ✓ | ✓ | +| 快照 | ✓ | ✕ | +| 客户端数据缓存 | ✕ | ✓ | +| Hadoop 数据本地性 | ✕ | ✓ | +| S3 兼容 | ✕ | ✓ | +| 配额 | 目录级配额 | Volume 级配额 | +| 开发语言 | C++ | Go | +| 开源协议 | LGPLv2.1 & LGPLv3 | Apache License 2.0 | + +#### 注 1:文件分块 + +虽然两者都做了大文件的分块,但在实现原理上有本质区别。CephFS 会将文件按 [`object_size`](https://docs.ceph.com/en/latest/cephfs/file-layouts/#reading-layouts-with-getfattr)(默认为 4MiB)拆分,每个分块对应一个 RADOS object。而 JuiceFS 则将文件先按 64MiB Chunk 拆分,每个 Chunk 在写入时根据实际情况进一步拆分成一个或多个逻辑 Slice,每个 Slice 在写入对象存储时再拆分成默认 4MiB 的 Block,Block 与对象存储中 object 一一对应。在处理覆盖写时,CephFS 需要直接修改对应的 objects,流程较为复杂;尤其是冗余策略为 EC 或者开启数据压缩时,往往需要先读取部分 object 内容,在内存中修改后再写入,这个流程会带来很大的性能开销。而 JuiceFS 在覆盖写时将更新数据作为新 objects 写入并修改元数据即可,性能大幅提升。过程中出现的冗余数据会异步完成垃圾回收。 + +#### 注 2:数据压缩 + +严格来讲,CephFS 本身并未提供数据压缩功能,其实际依赖的是 RADOS 层 BlueStore 的压缩。而 JuiceFS 则可以在 Block 上传到对象存储之前就进行一次数据压缩,以减少对象存储中的容量使用。换言之,如果用 JuiceFS 对接 RADOS,是能做到在 Block 进 RADOS 前后各进行一次压缩。另外,就像在**文件分块**中提到的,出于对覆盖写的性能保障,CephFS 一般不会开启 BlueStore 的压缩功能。 + +#### 注 3:数据加密 + +Ceph **Messenger v2** 支持网络传输层的数据加密,存储层则与压缩类似,依赖于 OSD 创建时提供的加密功能。JuiceFS 是在上传对象前和下载后执行加解密,在对象存储侧完全透明。 diff --git a/docs/zh_cn/comparison/juicefs_vs_s3ql.md b/docs/zh_cn/comparison/juicefs_vs_s3ql.md new file mode 100644 index 0000000..e2d9924 --- /dev/null +++ b/docs/zh_cn/comparison/juicefs_vs_s3ql.md @@ -0,0 +1,113 @@ +# JuiceFS 对比 S3QL + +与 JuiceFS 类似,[S3QL](https://github.com/s3ql/s3ql) 也是一款由对象存储和数据库组合驱动的开源网络文件系统,所有存入的数据会被分块后存储到亚马逊 S3、Backblaze B2、OpenStack Swift 等主流的对象存储中,相应的元数据会存储在数据库中。 + +## 共同点 + +- 都是通过 FUSE 模块实现对标准 POSIX 文件系统接口的支持,从而可以将海量的云端存储挂载到本地,像本地存储一样使用。 +- 都能提供标准的文件系统功能:硬链接、符号链接、扩展属性、文件权限。 +- 都支持数据压缩和加密,但二者采用的算法各有不同。 + +## 不同点 + +- S3QL 仅支持 SQLite 一种数据库,而 JuiceFS 除了支持 SQLite 以外还支持 Redis、TiKV、MySQL、PostgreSQL 等数据库。 +- S3QL 没有分布式能力,**不支持**多主机同时挂载。JuiceFS 是典型的分布式文件系统,在使用基于网络的数据库时,支持多主机分布式挂载读写。 +- S3QL 在一个数据块几秒内未被访问时将其上传到对象存储。文件被关闭甚者 fsync 后其仍仅保证在系统内存中,节点故障时可能丢失数据。JuiceFS 确保了数据的高可靠性,在文件关闭时会将其同步上传到对象存储。 +- S3QL 提供数据去重,相同数据只存储一份,可以降低对象存储的用量,但也会加重系统的性能开销。相比之下,JuiceFS 更注重性能,对大规模数据去重代价过高,暂不提供该功能。 +- S3QL 提供了元数据远程同步备份功能,存有元数据的 SQLite 数据库会异步备份到对象存储。JuiceFS 以使用 Redis、MySQL 等网络数据库为主,并未直接提供 SQLite 数据库同步备份功能,但 JuiceFS 支持元数据的导入导出,以及各种存储后端的同步功能,用户可以很容易将元数据备份到对象存储,也支持在不同数据库之间迁移。 + +| | **S3QL** | **JuiceFS** | +| :-------------------- | :----------------- | :------------------------- | +| 元数据引擎 | SQLite | Redis、MySQL、SQLite、TiKV | +| 存储引擎 | 对象存储、本地磁盘 | 对象存储、WebDAV、本地磁盘 | +| 操作系统 | Unix-like | Linux、macOS、Windows | +| 压缩算法 | LZMA, bzip2, gzip | lz4, zstd | +| 加密算法 | AES-256 | AES-GCM, RSA | +| POSIX 兼容 | ✓ | ✓ | +| 硬链接 | ✓ | ✓ | +| 符号链接 | ✓ | ✓ | +| 扩展属性 | ✓ | ✓ | +| 标准 Unix 权限 | ✓ | ✓ | +| 数据分块 | ✓ | ✓ | +| 本地缓存 | ✓ | ✓ | +| 空间弹性伸缩 | ✓ | ✓ | +| 元数据备份 | ✓ | ✓ | +| 数据去重 | ✓ | ✕ | +| 只读目录 | ✓ | ✕ | +| 快照 | ✓ | ✕ | +| 共享挂载 | ✕ | ✓ | +| Hadoop SDK | ✕ | ✓ | +| Kubernetes CSI Driver | ✕ | ✓ | +| S3 网关 | ✕ | ✓ | +| 开发语言 | Python | Go | +| 开源协议 | GPLv3 | Apache License 2.0 | +| 开源时间 | 2011 | 2021.1 | + +## 易用性 + +这部分主要评估两个产品在安装和使用上的的易用程度。 + +### 安装 + +在安装过程中,我们使用 Rocky Linux 8.4 操作系统(内核版本 4.18.0-305.12.1.el8_4.x86_64)。 + +#### S3QL + +S3QL 采用 Python 开发,在安装时需要依赖 python-devel 3.7 及以上版本。另外,还需要至少满足以下依赖:fuse3-devel、gcc、pyfuse3、sqlite-devel、cryptography、defusedxml、apsw、dugong。另外,需要特别注意 Python 的包依赖和位置问题。 + +S3QL 会在系统中安装 12 个二进制程序,每个程序都提供一个独立的功能,如下图。 + +![](../images/s3ql-bin.jpg) + +#### JuiceFS + +JuiceFS 客户端采用 Go 语言开发,直接下载预编译的二进制文件即可直接使用。JuiceFS 客户端只有一个二进制程序 `juicefs`,将其拷贝到系统的任何一个可执行路径下即可,比如:`/usr/local/bin`。 + +### 使用 + +S3QL 和 JuiceFS 都使用数据库保存元数据,S3QL 仅支持 SQLite 数据库,JuiceFS 支持 Redis、TiKV、MySQL、MariaDB、PostgreSQL 和 SQLite 等数据库。 + +这里使用本地创建的 Minio 对象存储,使用两款工具分别创建文件系统: + +#### S3QL + +S3QL 使用 `mkfs.s3ql` 工具创建文件系统: + +```shell +$ mkfs.s3ql --plain --backend-options no-ssl -L s3ql s3c://127.0.0.1:9000/s3ql/ +``` + +挂载文件系统使用 `mount.s3ql`: + +```shell +$ mount.s3ql --compress none --backend-options no-ssl s3c://127.0.0.1:9000/s3ql/ mnt-s3ql +``` + +S3QL 在创建和挂载文件系统时都需要通过命令行交互式的提供对象存储 API 的访问密钥。 + +#### JuiceFS + +JuiceFS 使用 `format` 子命令创建文件系统: + +```shell +$ juicefs format --storage minio \ + --bucket http://127.0.0.1:9000/myjfs \ + --access-key minioadmin \ + --secret-key minioadmin \ + sqlite3://myjfs.db \ + myjfs +``` + +挂载文件系统使用 `mount` 子命令: + +```shell +$ sudo juicefs mount -d sqlite3://myjfs.db mnt-juicefs +``` + +JuiceFS 只在创建文件系统时设置对象存储 API 访问密钥,相关信息会写入元数据引擎,之后挂载使用无需重复提供对象存储地址、密钥等信息。 + +## 对比总结 + +**S3QL** 采用对象存储 + SQLite 的存储结构,数据分块存储既能提高文件的读写效率,也能降低文件修改时的资源开销。贴心的提供了快照、数据去重、数据保持等高级功能,加之默认的数据压缩和数据加密,让 S3QL 非常适合个人在云存储上用较低的成本、更安全的存储文件。 + +**JuiceFS** 支持对象存储、HDFS、WebDAV、本地磁盘作为数据存储引擎,支持 Redis、TiKV、MySQL、MariaDB、PostgreSQL、SQLite 等流行的数据作为元数据存储引擎。除了通过 FUSE 提供标准的 POSIX 文件系统接口以外,JuiceFS 还提供 Java API,可以直接替代 HDFS 为 Hadoop 提供存储。同时还提供 [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver),可以作为 Kubernetes 的存储层做数据持久化存储。JucieFS 是为企业级分布式数据存储场景设计的文件系统,广泛应用于大数据分析、机器学习、容器共享存储、数据共享及备份等多种场景。 diff --git a/docs/zh_cn/deployment/_share_via_nfs.md b/docs/zh_cn/deployment/_share_via_nfs.md new file mode 100644 index 0000000..efb1c57 --- /dev/null +++ b/docs/zh_cn/deployment/_share_via_nfs.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 配置 NFS 共享 +sidebar_position: 5 +--- +# 通过 NFS 共享 JuiceFS 存储 \ No newline at end of file diff --git a/docs/zh_cn/deployment/_share_via_smb.md b/docs/zh_cn/deployment/_share_via_smb.md new file mode 100644 index 0000000..002f66b --- /dev/null +++ b/docs/zh_cn/deployment/_share_via_smb.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 配置 SMB 共享 +sidebar_position: 6 +--- +# 通过 SMB 共享 JuiceFS 存储 \ No newline at end of file diff --git a/docs/zh_cn/deployment/hadoop_java_sdk.md b/docs/zh_cn/deployment/hadoop_java_sdk.md new file mode 100644 index 0000000..8d9f82f --- /dev/null +++ b/docs/zh_cn/deployment/hadoop_java_sdk.md @@ -0,0 +1,587 @@ +--- +sidebar_label: Hadoop 使用 JuiceFS +sidebar_position: 3 +slug: /hadoop_java_sdk +--- +# 在 Hadoop 生态使用 JuiceFS 存储 + +JuiceFS 提供与 HDFS 接口[高度兼容](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/filesystem/introduction.html)的 Java 客户端,Hadoop 生态中的各种应用都可以在不改变代码的情况下,平滑地使用 JuiceFS 存储数据。 + +## 环境要求 + +### 1. Hadoop 及相关组件 + +JuiceFS Hadoop Java SDK 同时兼容 Hadoop 2.x、Hadoop 3.x,以及 Hadoop 生态中的各种主流组件。 + +### 2. 用户权限 + +JuiceFS 默认使用本地的 `用户` 和 `UID` 映射,在分布式环境下使用时,为了避免权限问题,请参考[文档](../administration/sync_accounts_between_multiple_hosts.md)将需要使用的 `用户` 和 `UID` 同步到所有 Hadoop 节点。也可以通过定义一个全局的用户和用户组文件给集群共享读取,[查看详情](#其他配置)。 + +### 3. 文件系统 + +通过 JuiceFS Java 客户端为 Hadoop 生态提供存储,需要提前创建 JuiceFS 文件系统。部署 Java 客户端时,在配置文件中指定已创建文件系统的元数据引擎地址。 + +创建文件系统可以参考 [JuiceFS 快速上手指南](../getting-started/for_local.md)。 + +:::note 注意 +如果要在分布式环境中使用 JuiceFS,创建文件系统时,请合理规划要使用的对象存储和数据库,确保它们可以被每个集群节点正常访问。 +::: + +### 4. 内存资源 + +JuiceFS Hadoop Java SDK 最多需要额外使用 4 * [`juicefs.memory-size`](#io-配置) 的 off-heap 内存用来加速读写性能,默认情况下,最多需要额外 1.2GB 内存(取决于写入负载)。 + +## 客户端编译 + +:::note 注意 +不论为哪个系统环境编译客户端,编译后的 JAR 文件都为相同的名称,且只能部署在匹配的系统环境中,例如在 Linux 中编译则只能用于 Linux 环境。另外,由于编译的包依赖 glibc,建议尽量使用低版本的系统进行编译,这样可以获得更好的兼容性。 +::: + +编译依赖以下工具: + +- [Go](https://golang.org/) 1.15+(中国用户建议使用 [Goproxy China 镜像加速](https://github.com/goproxy/goproxy.cn)) +- JDK 8+ +- [Maven](https://maven.apache.org/) 3.3+(中国用户建议使用[阿里云镜像加速](https://maven.aliyun.com)) +- git +- make +- GCC 5.4+ + +### Linux 和 macOS + +克隆仓库: + +```shell +$ git clone https://github.com/juicedata/juicefs.git +``` + +进入目录,执行编译: + +:::note 注意 +如果使用 Ceph 的 RADOS 作为 JuiceFS 的存储引擎,需要先安装 `librados-dev` 包并且在[编译 `libjfs.so`](https://github.com/juicedata/juicefs/blob/main/sdk/java/libjfs/Makefile#L22) 时加上 `-tags ceph`。 +::: + +```shell +$ cd juicefs/sdk/java +$ make +``` + +编译完成后,可以在 `sdk/java/target` 目录中找到编译好的 `JAR` 文件,包括两个版本: + +- 包含第三方依赖的包:`juicefs-hadoop-X.Y.Z.jar` +- 不包含第三方依赖的包:`original-juicefs-hadoop-X.Y.Z.jar` + +建议使用包含第三方依赖的版本。 + +### Windows + +用于 Windows 环境的客户端需要在 Linux 或 macOS 系统上通过交叉编译的方式获得,编译依赖 [mingw-w64](https://www.mingw-w64.org/),需要提前安装。 + +与编译面向 Linux 和 macOS 客户端的步骤相同,比如在 Ubuntu 系统上,先安装 `mingw-w64` 包,解决依赖问题: + +```shell +$ sudo apt install mingw-w64 +``` + +克隆并进入 JuiceFS 源代码目录,执行以下代码进行编译: + +```shell +$ cd juicefs/sdk/java +$ make win +``` + +## 部署客户端 + +让 Hadoop 生态各组件能够正确识别 JuiceFS,需要进行以下配置: + +1. 将编译好的 JAR 文件和 `$JAVA_HOME/lib/tools.jar` 放置到组件的 `classpath` 内,常见大数据平台和组件的安装路径见下表。 +2. 将 JuiceFS 相关配置写入配置文件(通常是 `core-site.xml`),详见[客户端配置参数](#客户端配置参数)。 + +建议将 JAR 文件放置在一个统一的位置,其他位置通过符号链接进行调用。 + +### 大数据平台 + +| 名称 | 安装路径 | +| ---- | ---- | +| CDH | `/opt/cloudera/parcels/CDH/lib/hadoop/lib`

`/opt/cloudera/parcels/CDH/spark/jars`

`/var/lib/impala` | +| HDP | `/usr/hdp/current/hadoop-client/lib`

`/usr/hdp/current/hive-client/auxlib`

`/usr/hdp/current/spark2-client/jars` | +| Amazon EMR | `/usr/lib/hadoop/lib`

`/usr/lib/spark/jars`

`/usr/lib/hive/auxlib` | +| 阿里云 EMR | `/opt/apps/ecm/service/hadoop/*/package/hadoop*/share/hadoop/common/lib`

`/opt/apps/ecm/service/spark/*/package/spark*/jars`

`/opt/apps/ecm/service/presto/*/package/presto*/plugin/hive-hadoop2`

`/opt/apps/ecm/service/hive/*/package/apache-hive*/lib`

`/opt/apps/ecm/service/impala/*/package/impala*/lib` | +| 腾讯云 EMR | `/usr/local/service/hadoop/share/hadoop/common/lib`

`/usr/local/service/presto/plugin/hive-hadoop2`

`/usr/local/service/spark/jars`

`/usr/local/service/hive/auxlib` | +| UCloud UHadoop | `/home/hadoop/share/hadoop/common/lib`

`/home/hadoop/hive/auxlib`

`/home/hadoop/spark/jars`

`/home/hadoop/presto/plugin/hive-hadoop2` | +| 百度云 EMR | `/opt/bmr/hadoop/share/hadoop/common/lib`

`/opt/bmr/hive/auxlib`

`/opt/bmr/spark2/jars` | + +### 社区开源组件 + +| 名称 | 安装路径 | +| ---- | ---- | +| Spark | `${SPARK_HOME}/jars` | +| Presto | `${PRESTO_HOME}/plugin/hive-hadoop2` | +| Flink | `${FLINK_HOME}/lib` | + +### 客户端配置参数 + +请参考以下表格设置 JuiceFS 文件系统相关参数,并写入配置文件,一般是 `core-site.xml`。 + +#### 核心配置 + +| 配置项 | 默认值 | 描述 | +| -------------------------------- | ---------------------------- | ------------------------------------------------------------ | +| `fs.jfs.impl` | `io.juicefs.JuiceFileSystem` | 指定要使用的存储实现,默认使用 `jfs://` 作为 scheme。如想要使用其它 scheme(例如 `cfs://`),则修改为 `fs.cfs.impl` 即可。无论使用的 scheme 是什么,访问的都是 JuiceFS 中的数据。 | +| `fs.AbstractFileSystem.jfs.impl` | `io.juicefs.JuiceFS` | 指定要使用的存储实现,默认使用 `jfs://` 作为 scheme。如想要使用其它 scheme(例如 `cfs://`),则修改为 `fs.AbstractFileSystem.cfs.impl` 即可。无论使用的 scheme 是什么,访问的都是 JuiceFS 中的数据。 | +| `juicefs.meta` | | 指定预先创建好的 JuiceFS 文件系统的元数据引擎地址。可以通过 `juicefs.{vol_name}.meta` 格式为客户端同时配置多个文件系统。具体请参考[「多文件系统配置」](#多文件系统配置)。 | + +#### 缓存配置 + +| 配置项 | 默认值 | 描述 | +| ---------------------------- | ------ | ------------------------------------------------------------ | +| `juicefs.cache-dir` | | 设置本地缓存目录,可以指定多个文件夹,用冒号 `:` 分隔,也可以使用通配符(比如 `*` )。**请预先创建好这些目录,并给予 `0777` 权限,便于多个应用共享缓存数据。** | +| `juicefs.cache-size` | 0 | 设置本地缓存目录的容量,单位 MiB,默认为 0,即不开启缓存。如果配置了多个缓存目录,该值代表所有缓存目录容量的总和。 | +| `juicefs.cache-full-block` | `true` | 是否缓存所有读取的数据块,`false` 表示只缓存随机读的数据块。 | +| `juicefs.free-space` | 0.1 | 本地缓存目录的最小可用空间比例,默认保留 10% 剩余空间。 | +| `juicefs.attr-cache` | 0 | 目录和文件属性缓存的过期时间(单位:秒) | +| `juicefs.entry-cache` | 0 | 文件项缓存的过期时间(单位:秒) | +| `juicefs.dir-entry-cache` | 0 | 目录项缓存的过期时间(单位:秒) | +| `juicefs.discover-nodes-url` | | 指定发现集群节点列表的方式,每 10 分钟刷新一次。

YARN:`yarn`
Spark Standalone:`http://spark-master:web-ui-port/json/`
Spark ThriftServer:`http://thrift-server:4040/api/v1/applications/`
Presto:`http://coordinator:discovery-uri-port/v1/service/presto/` | + +#### I/O 配置 + +| 配置项 | 默认值 | 描述 | +| ------------------------ | ------ | --------------------------------------- | +| `juicefs.max-uploads` | 20 | 上传数据的最大连接数 | +| `juicefs.max-deletes` | 2 | 删除数据的最大连接数 | +| `juicefs.get-timeout` | 5 | 下载一个对象的超时时间,单位为秒。 | +| `juicefs.put-timeout` | 60 | 上传一个对象的超时时间,单位为秒。 | +| `juicefs.memory-size` | 300 | 读写数据的缓冲区最大空间,单位为 MiB。 | +| `juicefs.prefetch` | 1 | 预读数据块的线程数 | +| `juicefs.upload-limit` | 0 | 上传带宽限制,单位为 Mbps,默认不限制。 | +| `juicefs.download-limit` | 0 | 下载带宽限制,单位为 Mbps,默认不限制。 | + +#### 其他配置 + +| 配置项 | 默认值 | 描述 | +| ------------------------- | ------- | ------------------------------------------------------------ | +| `juicefs.bucket` | | 为对象存储指定跟格式化时不同的访问地址 | +| `juicefs.debug` | `false` | 是否开启 debug 日志 | +| `juicefs.access-log` | | 访问日志的路径。需要所有应用都有写权限,可以配置为 `/tmp/juicefs.access.log`。该文件会自动轮转,保留最近 7 个文件。 | +| `juicefs.superuser` | `hdfs` | 超级用户 | +| `juicefs.users` | `null` | 用户名以及 UID 列表文件的地址,比如 `jfs://name/etc/users`。文件格式为 `:`,一行一个用户。 | +| `juicefs.groups` | `null` | 用户组、GID 以及组成员列表文件的地址,比如 `jfs://name/etc/groups`。文件格式为 `::,`,一行一个用户组。 | +| `juicefs.umask` | `null` | 创建文件和目录的 umask 值(如 `0022`),如果没有此配置,默认值是 `fs.permissions.umask-mode`。 | +| `juicefs.push-gateway` | | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) 地址,格式为 `:`。 | +| `juicefs.push-interval` | 10 | 推送数据到 Prometheus 的时间间隔,单位为秒。 | +| `juicefs.push-auth` | | [Prometheus 基本认证](https://prometheus.io/docs/guides/basic-auth)信息,格式为 `:`。 | +| `juicefs.fast-resolve` | `true` | 是否开启快速元数据查找(通过 Redis Lua 脚本实现) | +| `juicefs.no-usage-report` | `false` | 是否上报数据。仅上版本号等使用量数据,不包含任何用户信息。 | + +#### 多文件系统配置 + +当需要同时使用多个 JuiceFS 文件系统时,上述所有配置项均可对特定文件系统进行指定,只需要将文件系统名字放在配置项的中间,比如下面示例中的 `jfs1` 和 `jfs2`: + +```xml + + juicefs.jfs1.meta + redis://jfs1.host:port/1 + + + juicefs.jfs2.meta + redis://jfs2.host:port/1 + +``` + +#### 配置示例 + +以下是一个常用的配置示例,请替换 `juicefs.meta` 配置中的 `{HOST}`、`{PORT}` 和 `{DB}` 变量为实际的值。 + +```xml + + fs.jfs.impl + io.juicefs.JuiceFileSystem + + + fs.AbstractFileSystem.jfs.impl + io.juicefs.JuiceFS + + + juicefs.meta + redis://{HOST}:{PORT}/{DB} + + + juicefs.cache-dir + /data*/jfs + + + juicefs.cache-size + 1024 + + + juicefs.access-log + /tmp/juicefs.access.log + +``` + +## Hadoop 环境配置 + +请参照前述各项配置表,将配置参数加入到 Hadoop 配置文件 `core-site.xml` 中。 + +### CDH6 + +如果使用的是 CDH 6 版本,除了修改 `core-site` 外,还需要通过 YARN 服务界面修改 `mapreduce.application.classpath`,增加: + +```shell +$HADOOP_COMMON_HOME/lib/juicefs-hadoop.jar +``` + +### HDP + +除了修改 `core-site` 外,还需要通过 MapReduce2 服务界面修改配置 `mapreduce.application.classpath`,在末尾增加(变量无需替换): + +```shell +/usr/hdp/${hdp.version}/hadoop/lib/juicefs-hadoop.jar +``` + +### Flink + +将配置参数加入 `conf/flink-conf.yaml`。如果只是在 Flink 中使用 JuiceFS, 可以不在 Hadoop 环境配置 JuiceFS,只需要配置 Flink 客户端即可。 + +### Hudi + +:::note 注意 +目前最新版 Hudi(v0.10.0)还不支持 JuiceFS,你需要自行编译最新 master 分支。 +::: + +请参考[「Hudi 官方文档」](https://hudi.apache.org/docs/next/jfs_hoodie)了解如何配置 JuiceFS。 + +### 重启服务 + +当需要使用以下组件访问 JuiceFS 数据时,需要重启相关服务。 + +:::note 注意 +在重启之前需要保证 JuiceFS 配置已经写入配置文件,通常可以查看机器上各组件配置的 `core-site.xml` 里面是否有 JuiceFS 相关配置。 +::: + +| 组件名 | 服务名 | +| ------ | -------------------------- | +| Hive | HiveServer
Metastore | +| Spark | ThriftServer | +| Presto | Coordinator
Worker | +| Impala | Catalog Server
Daemon | +| HBase | Master
RegionServer | + +HDFS、Hue、ZooKeeper 等服务无需重启。 + +若访问 JuiceFS 出现 `Class io.juicefs.JuiceFileSystem not found` 或 `No FilesSystem for scheme: jfs` 错误,请参考 [FAQ](#faq)。 + +## 环境验证 + +JuiceFS Java 客户端部署完成以后,可以采用以下方式验证部署是否成功。 + +### Hadoop + +```bash +$ hadoop fs -ls jfs://{JFS_NAME}/ +``` + +:::info 说明 +这里的 `JFS_NAME` 是创建 JuiceFS 文件系统时指定的名称。 +::: + +### Hive + +```sql +CREATE TABLE IF NOT EXISTS person +( + name STRING, + age INT +) LOCATION 'jfs://{JFS_NAME}/tmp/person'; +``` + +## 监控指标收集 + +请查看[「监控」](../administration/monitoring.md)文档了解如何收集及展示 JuiceFS 监控指标 + +## 基准测试 + +以下提供了一系列方法,使用 JuiceFS 客户端内置的压测工具,对已经成功部署了客户端环境进行性能测试。 + + +### 1. 本地测试 + +#### 元数据性能 + +- **create** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench create -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + + 此命令会 create 10000 个空文件 + +- **open** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench open -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + + 此命令会 open 10000 个文件,并不读取数据 + +- **rename** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench rename -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + +- **delete** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench delete -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local + ``` + +- **参考值** + + | 操作 | TPS | 时延(ms) | + | ------ | ---- | ---- | + | create | 644 | 1.55 | + | open | 3467 | 0.29 | + | rename | 483 | 2.07 | + | delete | 506 | 1.97 | + +#### I/O 性能 + +- **顺序写** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -write -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local + ``` + +- **顺序读** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -read -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local + ``` + + 如果多次运行此命令,可能会出现数据被缓存到了系统缓存而导致读取速度非常快,只需清除 JuiceFS 的本地磁盘缓存即可 + +- **参考值** + + | 操作 | 吞吐(MB/s) | + | ------ | ---- | + | write | 647 | + | read | 111 | + +如果机器的网络带宽比较低,则一般能达到网络带宽瓶颈 + +### 2. 分布式测试 + +以下命令会启动 MapReduce 分布式任务程序对元数据和 IO 性能进行测试,测试时需要保证集群有足够的资源能够同时启动所需的 map 任务。 + +本项测试使用的计算资源: + +- **服务器**:3 台 4 核 32 GB 内存的云服务器,突发带宽 5Gbit/s。 +- **数据库**:阿里云 Redis 5.0 社区 4G 主从版 + +#### 元数据性能 + +- **create** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench create -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 此命令会启动 10 个 map task,每个 task 有 10 个线程,每个线程会创建 1000 个空文件,总共 100000 个空文件 + +- **open** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench open -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 此命令会启动 10 个 map task,每个 task 有 10 个线程,每个线程会 open 1000 个文件,总共 open 100000 个文件 + +- **rename** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench rename -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 此命令会启动 10 个 map task,每个 task 有 10 个线程,每个线程会 rename 1000 个文件,总共 rename 100000 个文件 + +- **delete** + + ```shell + hadoop jar juicefs-hadoop.jar nnbench delete -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench + ``` + + 此命令会启动 10 个 map task,每个 task 有 10 个线程,每个线程会 delete 1000 个文件,总共 delete 100000 个文件 + +- **参考值** + + - 10 并发 + + | 操作 | IOPS | 时延(ms) | + | ------ | ---- | ---- | + | create | 4178 | 2.2 | + | open | 9407 | 0.8 | + | rename | 3197 | 2.9 | + | delete | 3060 | 3.0 | + + - 100 并发 + + | 操作 | IOPS | 时延(ms) | + | ------ | ---- | ---- | + | create | 11773 | 7.9 | + | open | 34083 | 2.4 | + | rename | 8995 | 10.8 | + | delete | 7191 | 13.6 | + +#### I/O 性能 + +- **连续写** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -write -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO + ``` + + 此命令会启动 10 个 map task,每个 task 写入 10000MB 的数据 + +- **连续读** + + ```shell + hadoop jar juicefs-hadoop.jar dfsio -read -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO + ``` + + 此命令会启动 10 个 map task,每个 task 读取 10000MB 的数据 + + +- **参考值** + + | 操作 | 平均吞吐(MB/s) | 总吞吐(MB/s) | + | ------ | ---- | ---- | + | write | 198 | 1835 | + | read | 124 | 1234 | + +### 3. TPC-DS + +测试数据集 100GB 规模,测试 Parquet 和 ORC 两种文件格式。 + +本次测试仅测试前 10 个查询。 + +使用 Spark Thrift JDBC/ODBC Server 开启 Spark 常驻进程,然后通过 Beeline 连接提交任务。 + +#### 测试硬件 + +| 节点类型 | 机器型号 | CPU | 内存 | 磁盘 | 数量 | +| ------ | ------------------- | ---- | ------ | ---------------------------------- | ---- | +| Master | 阿里云 ecs.r6.xlarge | 4 | 32GiB | 系统盘:100GiB | 1 | +| Core | 阿里云 ecs.r6.xlarge | 4 | 32GiB | 系统盘:100GiB
数据盘:500GiB 高效云盘 x 2 | 3 | + +#### 软件配置 + +##### Spark Thrift JDBC/ODBC Server + +```shell +${SPARK_HOME}/sbin/start-thriftserver.sh \ + --master yarn \ + --driver-memory 8g \ + --executor-memory 10g \ + --executor-cores 3 \ + --num-executors 3 \ + --conf spark.locality.wait=100 \ + --conf spark.sql.crossJoin.enabled=true \ + --hiveconf hive.server2.thrift.port=10001 +``` + +##### JuiceFS 缓存配置 + +Core 节点的 2 块数据盘挂载在 `/data01` 和 `/data02` 目录下,`core-site.xml` 配置如下: + +```xml + + juicefs.cache-size + 200000 + + + juicefs.cache-dir + /data*/jfscache + + + juicefs.cache-full-block + false + + + juicefs.discover-nodes-url + yarn + + + juicefs.attr-cache + 3 + + + juicefs.entry-cache + 3 + + + juicefs.dir-entry-cache + 3 + +``` + +#### 测试 + +任务提交的命令如下: + +```shell +${SPARK_HOME}/bin/beeline -u jdbc:hive2://localhost:10001/${DATABASE} \ + -n hadoop \ + -f query{i}.sql +``` + +#### 结果 + +JuiceFS 可以使用本地磁盘作为缓存加速数据访问,以下数据是分别使用 Redis 和 TiKV 作为 JuiceFS 的元数据引擎跑 4 次后的结果(单位秒)。 + +##### ORC + +| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS | +| ------- | --------------- | -------------- | ---- | +| q1 | 20 | 20 | 20 | +| q2 | 28 | 33 | 26 | +| q3 | 24 | 27 | 28 | +| q4 | 300 | 309 | 290 | +| q5 | 116 | 117 | 91 | +| q6 | 37 | 42 | 41 | +| q7 | 24 | 28 | 23 | +| q8 | 13 | 15 | 16 | +| q9 | 87 | 112 | 89 | +| q10 | 23 | 24 | 22 | + +![orc](../images/spark_ql_orc.png) + +##### Parquet + +| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS | +| ------- | --------------- | -------------- | ---- | +| q1 | 33 | 35 | 39 | +| q2 | 28 | 32 | 31 | +| q3 | 23 | 25 | 24 | +| q4 | 273 | 284 | 266 | +| q5 | 96 | 107 | 94 | +| q6 | 36 | 35 | 42 | +| q7 | 28 | 30 | 24 | +| q8 | 11 | 12 | 14 | +| q9 | 85 | 97 | 77 | +| q10 | 24 | 28 | 38 | + +![parquet](../images/spark_sql_parquet.png) + + +## FAQ + +### 1. 出现 `Class io.juicefs.JuiceFileSystem not found` 异常 + +出现这个异常的原因是 juicefs-hadoop.jar 没有被加载,可以用 `lsof -p {pid} | grep juicefs` 查看 JAR 文件是否被加载。需要检查 JAR 文件是否被正确地放置在各个组件的 classpath 里面,并且保证 JAR 文件有可读权限。 + +另外,在某些发行版 Hadoop 环境中,需要修改 `mapred-site.xml` 中的 `mapreduce.application.classpath` 参数,添加 juicefs-hadoop.jar 的路径。 + +### 2. 出现 `No FilesSystem for scheme: jfs` 异常 + +出现这个异常的原因是 `core-site.xml` 配置文件中的 JuiceFS 配置没有被读取到,需要检查组件配置的 `core-site.xml` 中是否有 JuiceFS 相关配置。 diff --git a/docs/zh_cn/deployment/how_to_use_on_kubernetes.md b/docs/zh_cn/deployment/how_to_use_on_kubernetes.md new file mode 100644 index 0000000..c6ddff7 --- /dev/null +++ b/docs/zh_cn/deployment/how_to_use_on_kubernetes.md @@ -0,0 +1,370 @@ +--- +sidebar_label: Kubernetes 使用 JuiceFS +sidebar_position: 2 +slug: /how_to_use_on_kubernetes +--- +# Kubernetes 使用 JuiceFS 持久化数据 + +JuiceFS 非常适合用作 Kubernetes 集群的存储层,目前有两种常见的用法。 + +## JuiceFS CSI Driver + +[JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver) 遵循 [CSI](https://github.com/container-storage-interface/spec/blob/master/spec.md) 规范,实现了容器编排系统与 JuiceFS 文件系统之间的接口,支持动态配置 JuiceFS 卷提供给 Pod 使用。 + +### 版本要求 + +- Kubernetes 1.14+ + +### 安装 + +JuiceFS CSI Driver 有以下两种安装的方式。 + +#### 通过 Helm 安装 + +Helm 是 Kubernetes 的包管理器,Chart 是 Helm 管理的包。你可以把它看作是 Homebrew formula,APT dpkg,或 YUM RPM 在 Kubernetes 中的等价物。 + +该安装方式要求 Helm **3.1.0** 及以上版本,具体安装方法请参考[「Helm 安装指南」](https://github.com/helm/helm#install)。 + +1. 准备一个设置存储类基本信息的配置文件,例如:`values.yaml`,复制并完善下列配置信息。其中,`backend` 部分是 JuiceFS 文件系统相关的信息,你可以参照 [JuiceFS 快速上手指南](../getting-started/for_local.md)了解相关内容。如果使用的是已经提前创建好的 JuiceFS 卷,则只需填写 `name` 和 `metaurl` 这两项即可。`mountPod` 部分可以对使用此驱动的 Pod 设置 CPU 和内存的资源配置。不需要的项可以删除,或者将它的值留空。 + +```yaml +storageClasses: +- name: juicefs-sc + enabled: true + reclaimPolicy: Retain + backend: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + accessKey: "" + secretKey: "" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + mountPod: + resources: + limits: + cpu: "" + memory: "" + requests: + cpu: "" + memory: "" +``` + +在支持「角色管理」的云平台,可以通过为 Kubernetes 节点分配「服务角色」,实现对象存储 API 免密钥访问。这种情况下无需设置配置文件中的 `accessKey` 和 `secretKey`。 + +2. 依次执行以下三条命令,通过 Helm 部署 JuiceFS CSI Driver。 + +```shell +$ helm repo add juicefs-csi-driver https://juicedata.github.io/juicefs-csi-driver/ +$ helm repo update +$ helm install juicefs-csi-driver juicefs-csi-driver/juicefs-csi-driver -n kube-system -f ./values.yaml +``` + +3. 检查部署状态 + +- **检查 Pods**:部署过程会启动一个名为 `juicefs-csi-controller` 的 `StatefulSet` 及一个 replica,以及一个名为 `juicefs-csi-node` 的 `DaemonSet`。执行命令 `kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver` 会看到有 `n+1` 个 pod 在运行,例如: + +```sh +$ kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver +NAME READY STATUS RESTARTS AGE +juicefs-csi-controller-0 3/3 Running 0 22m +juicefs-csi-node-v9tzb 3/3 Running 0 14m +``` + +- **检查 secret**:通过命令 `kubectl -n kube-system describe secret juicefs-sc-secret` 可以看到前面 `values.yaml` 配置文件中 `backend` 部分的 secret 信息。 + +```sh +$ kubectl -n kube-system describe secret juicefs-sc-secret +Name: juicefs-sc-secret +Namespace: kube-system +Labels: app.kubernetes.io/instance=juicefs-csi-driver + app.kubernetes.io/managed-by=Helm + app.kubernetes.io/name=juicefs-csi-driver + app.kubernetes.io/version=0.7.0 + helm.sh/chart=juicefs-csi-driver-0.1.0 +Annotations: meta.helm.sh/release-name: juicefs-csi-driver + meta.helm.sh/release-namespace: default + +Type: Opaque + +Data +==== +access-key: 0 bytes +bucket: 47 bytes +metaurl: 54 bytes +name: 4 bytes +secret-key: 0 bytes +storage: 2 bytes +``` + +- **检查存储类(storage class)**:`kubectl get sc juicefs-sc` 命令将会显示类如下面的存储类: + +```sh +$ kubectl get sc juicefs-sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +juicefs-sc csi.juicefs.com Retain Immediate false 69m +``` + +#### 通过 kubectl 安装 + +由于 Kubernetes 在版本变更过程中会废弃部分旧的 API,因此需要根据你使用 Kubernetes 版本选择适用的部署文件: + +**Kubernetes v1.18 及以上版本** + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml +``` + +**Kubernetes v1.18 以下版本** + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s_before_v1_18.yaml +``` + +**创建存储类** + +参考以下内容创建一个配置文件,例如:`juicefs-sc.yaml`,在 `stringData` 部分填写 JuiceFS 文件系统的配置信息: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: juicefs-sc-secret + namespace: kube-system +type: Opaque +stringData: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + access-key: "" + secret-key: "" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +volumeBindingMode: Immediate +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +执行命令,部署存储类: + +```shell +$ kubectl apply -f ./juicefs-sc.yaml +``` + +另外,你也可以将上述配置文件中 Secret 部分抽离出来,通过 `kubectl` 在命令行上创建: + +```shell +$ kubectl -n kube-system create secret generic juicefs-sc-secret \ + --from-literal=name=test \ + --from-literal=metaurl=redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3 \ + --from-literal=storage=s3 \ + --from-literal=bucket=https://juicefs-test.s3.us-east-1.amazonaws.com \ + --from-literal=access-key="" \ + --from-literal=secret-key="" +``` + +这样一来,存储类的配置文件 `juicefs-sc.yaml` 应该像下面这样: + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +然后通过 `kubectl apply` 部署存储类: + +```shell +$ kubectl apply -f ./juicefs-sc.yaml +``` + +### 使用 JuiceFS 为 Pod 提供存储 + +JuiceFS CSI Driver 同时支持静态和动态 PV,你既可以将提前创建的 PV 手动分配给 Pods,也可以在部署 Pods 时通过 PVC 动态的创建卷。 + +例如,可以使用下面的配置创建一个名为 `development.yaml` 的配置文件,它通过 PVC 为 Nginx 容器创建持久化卷,并挂载到了容器的 `/config` 目录: + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: web-pvc +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 10Pi + storageClassName: juicefs-sc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run +spec: + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /config + name: web-data + volumes: + - name: web-data + persistentVolumeClaim: + claimName: web-pvc +``` + +通过 `kubectl apply` 部署 Pods: + +``` +$ kubectl apply -f ./development.yaml +``` + +部署成功以后,查看 pods 状态: + +```shell +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-run-7d6fb7d6df-cfsvp 1/1 Running 0 21m +``` + +我们可以简单的通过 `kubectl exec` 命令查看容器中的文件系统挂载情况: + +```shell +$ kubectl exec nginx-run-7d6fb7d6df-cfsvp -- df -Th +Filesystem Type Size Used Avail Use% Mounted on +overlay overlay 40G 7.0G 34G 18% / +tmpfs tmpfs 64M 0 64M 0% /dev +tmpfs tmpfs 3.8G 0 3.8G 0% /sys/fs/cgroup +JuiceFS:jfs fuse.juicefs 1.0P 180M 1.0P 1% /config +... +``` + +从容器中返回的结果可以看到,完全符合预期,JuiceFS 卷已经挂载到了我们指定的 `/config` 目录。 + +像上面这样通过 PVC 动态创建 PV 时,JuiceFS 会在文件系统根目录创建与 PV 同名的目录并挂载到容器中。执行下列命令,可以查看集群中所有 PV: + +```shell +$ kubectl get pv -A +NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE +pvc-b670c8a1-2962-497c-afa2-33bc8b8bb05d 10Pi RWX Retain Bound default/web-pvc juicefs-sc 34m +``` + +通过外部主机挂载同一个 JuiceFS 存储,可以看到当前正在使用的 PV 以及曾经创建的 PV。 + +![](../images/pv-on-juicefs.png) + +如果想了解更多关于 JuiceFS CSI Driver 的信息,请参考[项目主页](https://github.com/juicedata/juicefs-csi-driver)。 + +### 创建更多 JuiceFS 存储类 + +你可以根据实际需要重复前面的步骤,通过 JuiceFS CSI Driver 创建任意数量的存储类。但要注意修改存储类的名称以及 JuiceFS 文件系统的配置信息,避免与已创建的存储类冲突。例如,使用 Helm 时可以创建一个名为 `jfs2.yaml` 的配置文件: + +```yaml +storageClasses: +- name: jfs-sc2 + enabled: true + reclaimPolicy: Retain + backend: + name: "jfs-2" + metaurl: "redis://example.abc.0001.use1.cache.amazonaws.com/3" + storage: "s3" + accessKey: "" + secretKey: "" + bucket: "https://jfs2.s3.us-east-1.amazonaws.com" +``` + +执行 Helm 命令进行部署: + +```shell +$ helm repo add juicefs-csi-driver https://juicedata.github.io/juicefs-csi-driver/ +$ helm repo update +$ helm upgrade juicefs-csi-driver juicefs-csi-driver/juicefs-csi-driver --install -f ./jfs2.yaml +``` + +查看集群中存储类的情况: + +```shell +$ kubectl get sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +juicefs-sc csi.juicefs.com Retain Immediate false 88m +juicefs-sc2 csi.juicefs.com Retain Immediate false 13m +standard (default) k8s.io/minikube-hostpath Delete Immediate false 128m +``` + +### 监控 + +请查看[「监控」](../administration/monitoring.md)文档了解如何收集及展示 JuiceFS 监控指标 + +## 在容器中挂载 JuiceFS + +某些情况下,你可能需要在容器中直接挂载 JuiceFS 存储,这需要在容器中使用 JuiceFS 客户端,你可以参考以下 `Dockerfile` 样本将 JuiceFS 客户端集成到应用镜像: + +```dockerfile +FROM alpine:latest +LABEL maintainer="Juicedata " + +# Install JuiceFS client +RUN apk add --no-cache curl && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + install juicefs /usr/bin && \ + rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + rm -rf /var/cache/apk/* && \ + apk del curl + +ENTRYPOINT ["/usr/bin/juicefs", "mount"] +``` + +由于 JuiceFS 需要使用 FUSE 设备挂载文件系统,因此在创建 Pod 时需要允许容器在特权模式下运行: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run +spec: + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + securityContext: + privileged: true +``` + +> ⚠️ **风险提示**:容器启用 `privileged: true` 特权模式以后,就具备了访问宿主机所有设备的权限,即拥有了对宿主机内核的完全控制权限。使用不当会带来严重的安全隐患,请您在使用此方式之前进行充分的安全评估。 diff --git a/docs/zh_cn/deployment/juicefs_on_docker.md b/docs/zh_cn/deployment/juicefs_on_docker.md new file mode 100644 index 0000000..38b1942 --- /dev/null +++ b/docs/zh_cn/deployment/juicefs_on_docker.md @@ -0,0 +1,107 @@ +--- +sidebar_label: Docker 使用 JuiceFS +sidebar_position: 1 +slug: /juicefs_on_docker +--- +# 在 Docker 使用 JuiceFS + +目前有三种在 Docker 上使用 JuiceFS 存储的方法: + +## 1. 卷映射 + +这种方法是将 JuiceFS 挂载点中的目录映射给 Docker 容器。比如, JuiceFS 文件系统挂载在 `/mnt/jfs` 目录,在创建容器时可以这样将 JuiceFS 存储映射到 Docker 容器: + +```sh +$ sudo docker run -d --name nginx \ + -v /mnt/jfs/html:/usr/share/nginx/html \ + -p 8080:80 \ + nginx +``` + +但需要注意,默认情况下,只有挂载 JuiceFS 存储的用户有存储的读写权限,当你需要将 JuiceFS 存储映射给 Docker 容器使用时,如果你没有使用 root 身份挂载 JuiceFS 存储,则需要先开启 FUSE 的 `user_allow_other` 选项,然后再添加 `-o allow_other` 选项重新挂载 JuiceFS 文件系统。 + +> **注意**:使用 root 用户身份或使用 sudo 挂载的 JuiceFS 存储,会自动添加 `allow_other` 选项,无需手动设置。 + +### FUSE 设置 + +默认情况下,`allow_other` 选项只允许 root 用户使用,为了让普通用户也有权限使用该挂载选项,需要修改 FUSE 的配置文件。 + +#### 修改配置文件 + +编辑 FUSE 的配置文件,通常是 `/etc/fuse.conf`: + +```sh +$ sudo nano /etc/fuse.conf +``` + +将配置文件中的 `user_allow_other` 前面的 `#` 注释符删掉,修改后类似下面这样: + +```conf +# /etc/fuse.conf - Configuration file for Filesystem in Userspace (FUSE) + +# Set the maximum number of FUSE mounts allowed to non-root users. +# The default is 1000. +#mount_max = 1000 + +# Allow non-root users to specify the allow_other or allow_root mount options. +user_allow_other +``` + +### 重新挂载 JuiceFS + +FUSE 的 `user_allow_other` 启用后,你需要重新挂载 JuiceFS 文件系统,使用 `-o` 选项设置 `allow_other`,例如: + +```sh +$ juicefs mount -d -o allow_other redis://:6379/1 /mnt/jfs +``` + +## 2. Docker Volume Plugin + +JuiceFS 也支持使用 [volume plugin](https://docs.docker.com/engine/extend/) 方式访问。 + +```sh +$ docker plugin install juicedata/juicefs +Plugin "juicedata/juicefs" is requesting the following privileges: + - network: [host] + - device: [/dev/fuse] + - capabilities: [CAP_SYS_ADMIN] +Do you grant the above permissions? [y/N] + +$ docker volume create -d juicedata/juicefs:latest -o name={{VOLUME_NAME}} -o metaurl={{META_URL}} -o access-key={{ACCESS_KEY}} -o secret-key={{SECRET_KEY}} jfsvolume +$ docker run -it -v jfsvolume:/opt busybox ls /opt +``` + +将上面 `{{VOLUME_NAME}}`、`{{META_URL}}`、`{{ACCESS_KEY}}`、`{{SECRET_KEY}}` 替换成你自己的文件系统配置。想要了解更多 JuiceFS 卷插件内容,可以访问 [juicedata/docker-volume-juicefs](https://github.com/juicedata/docker-volume-juicefs) 代码仓库。 + +## 3. 在 Docker 容器中挂载 JuiceFS + +这种方法是将 JuiceFS 文件系统直接在 Docker 容器中进行挂载和使用,相比第一种方式,在容器中直接挂载 JuiceFS 可以缩小文件被误操作的几率。谁使用谁挂载,也让容器管理更清晰直观。 + +由于在容器中进行文件系统挂载需要将 JuiceFS 客户端拷贝到容器,在常规的容器管理过程中,需要把下载或拷贝 JuiceFS 客户端以及挂载文件系统的过程写入 Dockerfile,然后重新构建镜像。例如,你可以参考以下 Dockerfile,将 JuiceFS 客户端打包到 Alpine 镜像。 + +```dockerfile +FROM alpine:latest +LABEL maintainer="Juicedata " + +# Install JuiceFS client +RUN apk add --no-cache curl && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + install juicefs /usr/bin && \ + rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \ + rm -rf /var/cache/apk/* && \ + apk del curl + +ENTRYPOINT ["/usr/bin/juicefs", "mount"] +``` + +另外,由于在容器中使用 FUSE 需要相应的权限,在创建容器时,需要指定 `--privileged=true` 选项,比如: + +```sh +$ sudo docker run -d --name nginx \ + -v /mnt/jfs/html:/usr/share/nginx/html \ + -p 8080:80 \ + --privileged=true \ + nginx-with-jfs +``` diff --git a/docs/zh_cn/deployment/s3_gateway.md b/docs/zh_cn/deployment/s3_gateway.md new file mode 100644 index 0000000..12e99af --- /dev/null +++ b/docs/zh_cn/deployment/s3_gateway.md @@ -0,0 +1,250 @@ +--- +sidebar_label: 配置 JuiceFS S3 网关 +sidebar_position: 4 +slug: /s3_gateway +--- +# 启用 JuiceFS 的 S3 网关 + +JuiceFS 从 v0.11 开始引入了 S3 网关,这是一个通过 [MinIO S3 网关](https://docs.min.io/docs/minio-gateway-for-s3.html)实现的功能。它为 JuiceFS 中的文件提供跟 S3 兼容的 RESTful API,在不方便挂载的情况下能够用 s3cmd、AWS CLI、MinIO Client(mc)等工具管理 JuiceFS 上存储的文件。另外,S3 网关还提供了一个基于网页的文件管理器,用户使用浏览器就能对 JuiceFS 上的文件进行常规的增删管理。 + +因为 JuiceFS 会将文件分块存储到底层的对象存储中,不能直接使用底层对象存储的接口和界面来直接访问文件,S3 网关提供了类似底层对象存储的访问能力,架构图如下: + +![](../images/juicefs-s3-gateway-arch.png) + +## 先决条件 + +S3 网关是建立在 JuiceFS 文件系统之上的功能,如果你还没有 JuiceFS 文件系统,请先参考 [快速上手指南](../getting-started/for_local.md) 创建一个。 + +JuiceFS S3 网关是 v0.11 中引入的功能,请确保您拥有最新版本的 JuiceFS。 + +## 快速开始 + +使用 JuiceFS 的 `gateway` 子命令即可在当前主机启用 S3 网关。在开启功能之前,需要先设置 `MINIO_ROOT_USER` 和 `MINIO_ROOT_PASSWORD` 两个环境变量,即访问 S3 API 时认证身份用的 Access Key 和 Secret Key。可以简单的把它们视为 S3 网关的用户名和密码。例如: + +```shell +$ export MINIO_ROOT_USER=admin +$ export MINIO_ROOT_PASSWORD=12345678 +$ juicefs gateway redis://localhost:6379 localhost:9000 +``` + +以上三条命令中,前两条命令用于设置环境变量。注意,`MINIO_ROOT_USER` 的长度至少 3 个字符, `MINIO_ROOT_PASSWORD` 的长度至少 8 个字符(Windows 用户请改用 `set` 命令设置环境变量,例如:`set MINIO_ROOT_USER=admin`)。 + +最后一条命令用于启用 S3 网关,`gateway` 子命令至少需要提供两个参数,第一个是存储元数据的数据库 URL,第二个是 S3 网关监听的地址和端口。你可以根据需要在 `gateway` 子命令中添加[其他选项](../reference/command_reference.md#juicefs-gateway)优化 S3 网关,比如,可以将默认的本地缓存设置为 20 GiB。 + +```shell +$ juicefs gateway --cache-size 20480 redis://localhost:6379 localhost:9000 +``` + +在这个例子中,我们假设 JuiceFS 文件系统使用的是本地的 Redis 数据库。当 S3 网关启用时,在**当前主机**上可以使用 `http://localhost:9000` 这个地址访问到 S3 网关的管理界面。 + +![](../images/s3-gateway-file-manager.jpg) + +如果你希望通过局域网或互联网上的其他主机访问 S3 网关,则需要调整监听地址,例如: + +```shell +$ juicefs gateway redis://localhost:6379 0.0.0.0:9000 +``` + +这样一来,S3 网关将会默认接受所有网络请求。不同的位置的 S3 客户端可以使用不同的地址访问 S3 网关,例如: + +- S3 网关所在主机中的第三方客户端可以使用 `http://127.0.0.1:9000` 或 `http://localhost:9000` 进行访问; +- 与 S3 网关所在主机处于同一局域网的第三方客户端可以使用 `http://192.168.1.8:9000` 访问(假设启用 S3 网关的主机内网 IP 地址为 192.168.1.8); +- 通过互联网访问 S3 网关可以使用 `http://110.220.110.220:9000` 访问(假设启用 S3 网关的主机公网 IP 地址为 110.220.110.220)。 + +## 访问 S3 网关 + +各类支持 S3 API 的客户端、桌面程序、Web 程序等都可以访问 JuiceFS S3 网关。使用时请注意 S3 网关监听的地址和端口。 + +:::tip 提示 +以下示例均为使用第三方客户端访问本地主机上运行的 S3 网关。在具体场景下,请根据实际情况调整访问 S3 网关的地址。 +::: + +### 使用 AWS CLI + +从 [https://aws.amazon.com/cli](https://aws.amazon.com/cli) 下载并安装 AWS CLI,然后进行配置: + +```bash +$ aws configure +AWS Access Key ID [None]: admin +AWS Secret Access Key [None]: 12345678 +Default region name [None]: +Default output format [None]: +``` + +程序会通过交互式的方式引导你完成新配置的添加,其中 `Access Key ID` 与 `MINIO_ROOT_USER` 相同,`Secret Access Key` 与 `MINIO_ROOT_PASSWORD` 相同,区域名称和输出格式请留空。 + +之后,即可使用 `aws s3` 命令访问 JuiceFS 存储,例如: + +```bash +# List buckets +$ aws --endpoint-url http://localhost:9000 s3 ls + +# List objects in bucket +$ aws --endpoint-url http://localhost:9000 s3 ls s3:// +``` + +### 使用 MinIO 客户端 + +首先参照 [MinIO 下载页面](https://min.io/download)安装 mc,然后添加一个新的 alias: + +```bash +$ mc alias set juicefs http://localhost:9000 admin 12345678 --api S3v4 +``` + +依照 mc 的命令格式,以上命令创建了一个别名为 `juicefs` 的配置。特别注意,命令中必须指定 API 版本,即 `--api "s3v4"`。 + +然后,你可以通过 mc 客户端自由的在本地磁盘与 JuiceFS 存储以及其他云存储之间进行文件和文件夹的复制、移动、增删等管理操作。 + +```shell +$ mc ls juicefs/jfs +[2021-10-20 11:59:00 CST] 130KiB avatar-2191932_1920.png +[2021-10-20 11:59:00 CST] 4.9KiB box-1297327.svg +[2021-10-20 11:59:00 CST] 21KiB cloud-4273197.svg +[2021-10-20 11:59:05 CST] 17KiB hero.svg +[2021-10-20 11:59:06 CST] 1.7MiB hugo-rocha-qFpnvZ_j9HU-unsplash.jpg +[2021-10-20 11:59:06 CST] 16KiB man-1352025.svg +[2021-10-20 11:59:06 CST] 1.3MiB man-1459246.ai +[2021-10-20 11:59:08 CST] 19KiB sign-up-accent-left.07ab168.svg +[2021-10-20 11:59:10 CST] 11MiB work-4997565.svg +``` + +## 在 Kubernetes 中部署 S3 网关 + +### 通过 kubectl 部署 + +首先创建 secret(以 Amazon S3 为例): + +```shell +export NAMESPACE=default +``` + +```shell +kubectl -n ${NAMESPACE} create secret generic juicefs-secret \ + --from-literal=name= \ + --from-literal=metaurl=redis://[:]@:6379[/] \ + --from-literal=storage=s3 \ + --from-literal=bucket=https://.s3..amazonaws.com \ + --from-literal=access-key= \ + --from-literal=secret-key= +``` + +其中: +- `name`:JuiceFS 文件系统名称 +- `metaurl`:元数据服务的访问 URL(比如 Redis)。更多信息参考[这篇文档](../reference/how_to_setup_metadata_engine.md)。 +- `storage`:对象存储类型,比如 `s3`、`gs`、`oss`。更多信息参考[这篇文档](../reference/how_to_setup_object_storage.md)。 +- `bucket`:Bucket URL。更多信息参考[这篇文档](../reference/how_to_setup_object_storage.md)。 +- `access-key`:对象存储的 access key。更多信息参考[这篇文档](../reference/how_to_setup_object_storage.md)。 +- `secret-key`:对象存储的 secret key。更多信息参考[这篇文档](../reference/how_to_setup_object_storage.md)。 + +然后下载 S3 网关[部署 YAML](https://github.com/juicedata/juicefs/blob/main/deploy/juicefs-s3-gateway.yaml) 并通过 `kubectl` 创建 `Deployment` 和 `Service` 资源。以下几点需要特别注意: + +- 请将以下命令的 `${NAMESPACE}` 替换为实际部署 S3 网关的 Kubernetes 名字空间,默认为 `kube-system`。 +- `Deployment` 的 `replicas` 默认为 1,请根据实际情况调整。 +- 默认使用 `juicedata/juicefs-csi-driver` 最新版镜像,其中已经集成了最新版 JuiceFS 客户端,具体集成的 JuiceFS 客户端版本请查看[这里](https://github.com/juicedata/juicefs-csi-driver/releases)。 +- `Deployment` 的 `initContainers` 会先尝试格式化 JuiceFS 文件系统,如果你已经提前格式化完毕,这一步不会影响现有 JuiceFS 文件系统。 +- S3 网关默认监听的端口号为 9000 +- S3 网关[启动选项](../reference/command_reference.md#juicefs-gateway)均为默认值,请根据实际需求调整。 +- `MINIO_ROOT_USER` 环境变量的值为 Secret 中的 `access-key`,`MINIO_ROOT_PASSWORD` 环境变量的值为 Secret 中的 `secret-key`。 + +```shell +curl -sSL https://raw.githubusercontent.com/juicedata/juicefs/main/deploy/juicefs-s3-gateway.yaml | sed "s@kube-system@${NAMESPACE}@g" | kubectl apply -f - +``` + +检查是否已经部署成功: + +```shell +# kubectl -n $NAMESPACE get po -o wide -l app.kubernetes.io/name=juicefs-s3-gateway +juicefs-s3-gateway-5c7d65c77f-gj69l 1/1 Running 0 37m 10.244.2.238 kube-node-3 +# kubectl -n $NAMESPACE get svc -l app.kubernetes.io/name=juicefs-s3-gateway +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +juicefs-s3-gateway ClusterIP 10.101.108.42 9000/TCP 142m +``` + +可以在应用 pod 中通过 `juicefs-s3-gateway.${NAMESPACE}.svc.cluster.local:9000` 域名或 `juicefs-s3-gateway` 的 pod IP 及端口号(例如 `10.244.2.238:9000`)访问 JuiceFS S3 网关。 + +若想通过 Ingress 访问,需要确保集群中已经部署了 Ingress Controller,参考 [Ingress Controller 部署文档](https://kubernetes.github.io/ingress-nginx/deploy/)。创建 `Ingress` 资源: + +```yaml +kubectl apply -f - <` 来访问 S3 网关(不需要带上 9000 端口号),如下: + +```shell +kubectl get services -n ingress-nginx +``` + +Ingress 的各个版本之间差异较大,更多使用方式请参考 [Ingress Controller 使用文档](https://kubernetes.github.io/ingress-nginx/user-guide/basic-usage/)。 + +### 通过 Helm 部署 + +1. 准备配置文件 + + 创建一个配置文件,例如:`values.yaml`,复制并完善下列配置信息。其中,`secret` 部分是 JuiceFS 文件系统相关的信息,你可以参照 [JuiceFS 快速上手指南](../getting-started/for_local.md) 了解相关内容。 + + ```yaml + secret: + name: "" + metaurl: "" + storage: "" + accessKey: "" + secretKey: "" + bucket: "" + ``` + + 若需要部署 Ingress,在 `values.yaml` 中再加上: + + ```yaml + ingress: + enables: true + ``` + +2. 部署 + + 依次执行以下三条命令,通过 Helm 部署 JuiceFS S3 网关(注意以下示例是部署到 `kube-system` 名字空间)。 + + ```sh + helm repo add juicefs-s3-gateway https://juicedata.github.io/charts/ + helm repo update + helm install juicefs-s3-gateway juicefs-s3-gateway/juicefs-s3-gateway -n kube-system -f ./values.yaml + ``` + +3. 检查部署状态 + + - **检查 Pods**:部署过程会启动一个名为 `juicefs-s3-gateway` 的 `Deployment`。执行命令 `kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway` 查看部署的 pod: + + ```sh + $ kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway + NAME READY STATUS RESTARTS AGE + juicefs-s3-gateway-5c69d574cc-t92b6 1/1 Running 0 136m + ``` + + - **检查 Service**:执行命令 `kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway` 查看部署的 Service: + + ```shell + $ kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + juicefs-s3-gateway ClusterIP 10.101.108.42 9000/TCP 142m + ``` + +## 监控 + +请查看[「监控」](../administration/monitoring.md)文档了解如何收集及展示 JuiceFS 监控指标 diff --git a/docs/zh_cn/development/contributing_guide.md b/docs/zh_cn/development/contributing_guide.md new file mode 100644 index 0000000..f14ea70 --- /dev/null +++ b/docs/zh_cn/development/contributing_guide.md @@ -0,0 +1,8 @@ +--- +sidebar_label: 贡献指南 +sidebar_position: 1 +--- +# 贡献指南 + +:::note 注意 +文档正在编写 \ No newline at end of file diff --git a/docs/zh_cn/development/format.md b/docs/zh_cn/development/format.md new file mode 100644 index 0000000..89e56cd --- /dev/null +++ b/docs/zh_cn/development/format.md @@ -0,0 +1,8 @@ +--- +sidebar_label: 存储格式 +sidebar_position: 3 +--- +# 存储格式 + +:::note 注意 +文档正在编写 \ No newline at end of file diff --git a/docs/zh_cn/development/io_processing.md b/docs/zh_cn/development/io_processing.md new file mode 100644 index 0000000..48505dc --- /dev/null +++ b/docs/zh_cn/development/io_processing.md @@ -0,0 +1,53 @@ +--- +sidebar_label: 读写请求处理流程 +sidebar_position: 2 +slug: /internals/io_processing +--- +# JuiceFS 读写请求处理流程介绍 + +## 写入流程 + +JuiceFS 对大文件会做多级拆分(参见 [JuiceFS 如何存储文件](../reference/how_juicefs_store_files.md)),以提高读写效率。在处理写请求时,JuiceFS 先将数据写入 Client 的内存缓冲区,并在其中按 Chunk/Slice 的形式进行管理。Chunk 是根据文件内 offset 按 64 MiB 大小拆分的连续逻辑单元,不同 Chunk 之间完全隔离。每个 Chunk 内会根据应用写请求的实际情况进一步拆分成 Slices;当新的写请求与已有的 Slice 连续或有重叠时,会直接在该 Slice 上进行更新,否则就创建新的 Slice。Slice 是启动数据持久化的逻辑单元,其在 flush 时会先将数据按照默认 4 MiB 大小拆分成一个或多个连续的 Blocks,并上传到对象存储,每个 Block 对应一个 Object;然后再更新一次元数据,写入新的 Slice 信息。显然,在应用顺序写情况下,只需要**一个**不停增长的 Slice,最后仅 flush 一次即可;此时能最大化发挥出对象存储的写入性能。以一次简单的 [JuiceFS 基准测试](../benchmark/performance_evaluation_guide.md)为例,其第一阶段是使用 1 MiB IO 顺序写 1 GiB 文件,数据在各个组件中的形式如下图所示: + +![write](../images/internals-write.png) + +> **注意**:图中的压缩和加密默认未开启。欲启用相关功能需要在 format 文件系统的时候添加 `--compress value` 或 `--encrypt-rsa-key value` 选项。 + +这里再放一张测试过程中用 `stats` 命令记录的指标图,可以更直观地看到相关信息: + +![stats](../images/internals-stats.png) + +上图中第 1 阶段: + +- 对象存储写入的平均 IO 大小为 `object.put / object.put_c = 4 MiB`,等于 Block 的默认大小 +- 元数据事务数与对象存储写入数比例大概为 `meta.txn : object.put_c ~= 1 : 16`,对应 Slice flush 需要的 1 次元数据修改和 16 次对象存储上传,同时也说明了每次 flush 写入的数据量为 4 MiB * 16 = 64 MiB,即 Chunk 的默认大小 +- FUSE 层的平均请求大小为约 `fuse.write / fuse.ops ~= 128 KiB`,与其默认的请求大小限制一致 + +相较于顺序写来说,大文件内随机写的情况要复杂许多;每个 Chunk 内可能存在**多个不连续**的 Slice,使得一方面数据对象难以达到 4 MiB 大小,另一方面元数据需要多次更新。同时,当一个 Chunk 内已写入的 Slices 过多时,会触发 Compaction 来尝试合并与清理这些 Slices,这又会进一步增大系统的负担。因此,JuiceFS 在此类场景下会比顺序写有较明显的性能下降。 + +小文件的写入通常是在文件关闭时被上传到对象存储,对应 IO 大小一般就是文件大小。从上面指标图的第 3 阶段(创建 128 KiB 小文件)中也可以看到: + +- 对象存储 PUT 的大小就是 128 KiB +- 元数据事务数大致是 PUT 计数的两倍,对应每个文件的一次 Create 和一次 Write + +值得一提的是,对于这种不足一个 Block 的对象,JuiceFS 在上传的同时还会尝试写入到本地 Cache(由 `--cache-dir` 指定,可以是内存或硬盘),以期能提升后续可能的读请求速度。从指标图中也可以看到,创建小文件时 blockcache 下有同等的写入带宽,而在读取时(第 4 阶段)大部分均在 Cache 命中,这使得小文件的读取速度看起来特别快。 + +由于写请求写入 Client 内存缓冲区即可返回,因此通常来说 JuiceFS 的 Write 时延非常低(几十微秒级别),真正上传到对象存储的动作由内部自动触发(单个 Slice 过大,Slice 数量过多,缓冲时间过长等)或应用主动触发(关闭文件、调用 `fsync` 等)。缓冲区中的数据只有在被持久化后才能释放,因此当写入并发比较大或者对象存储性能不足时,有可能占满缓冲区而导致写阻塞。具体而言,缓冲区的大小由挂载参数 `--buffer-size` 指定,默认为 300 MiB;其实时值可以在指标图的 usage.buf 一列中看到。当使用量超过阈值时,JuiceFS Client 会主动为 Write 添加约 10ms 等待时间以减缓写入速度;若已用量超过阈值两倍,则会导致新的写入暂停直至缓冲区得到释放。因此,在观察到 Write 时延上升以及 Buffer 长时间超过阈值时,通常需要尝试设置更大的 `--buffer-size`。另外,通过增大 `--max-uploads` 参数(上传到对象存储的最大并发数,默认为 20)也有可能提升写入到对象存储的带宽,从而加快缓冲区的释放。 + +### 回写(Writeback)模式 + +当对数据的一致性和可靠性要求并不高时,还可以在挂载时添加 `--writeback` 以进一步提升系统性能。回写模式开启后,Slice flush 仅需写到本地 Staging 目录(与 Cache 共享)即可返回,数据由后台线程异步上传到对象存储。请注意,JuiceFS 的回写模式与通常理解的先写内存不同,是需要将数据写入本地 Cache 目录的(具体的行为根据 Cache 目录所在硬件和本地文件系统而定)。换个角度理解,此时本地目录就是对象存储的缓存层。 + +回写模式开启后,还会默认跳过对上传对象的大小检查,激进地尽量将所有数据都保留在 Cache 目录。这在一些会产生大量中间文件的场景(如软件编译等)特别有用。此外,JuiceFS v0.17 版本还新增了 `--upload-delay` 参数,用来延缓数据上传到对象存储的时间,以更激进地方式将其缓存在本地。如果在等待的时间内数据被应用删除,则无需再上传到对象存储,既提升了性能也节省了成本。同时相较于本地硬盘而言,JuiceFS 提供了后端保障,在 Cache 目录容量不足时依然会自动将数据上传,确保在应用侧不会因此而感知到错误。这个功能在应对 Spark shuffle 等有临时存储需求的场景时非常有效。 + +## 读取流程 + +JuiceFS 在处理读请求时,一般会按照 4 MiB Block 对齐的方式去对象存储读取,实现一定的预读功能。同时,读取到的数据会写入本地 Cache 目录,以备后用(如指标图中的第 2 阶段,blockcache 有很高的写入带宽)。显然,在顺序读时,这些提前获取的数据都会被后续的请求访问到,Cache 命中率非常高,因此也能充分发挥出对象存储的读取性能。此时数据在各个组件中的流动如下图所示: + +![read](../images/internals-read.png) + +> **注意**:读取的对象到达 JuiceFS Client 后会先解密再解压缩,与写入时相反。当然,如果未启用相关功能则对应流程会直接跳过。 + +做大文件内随机小 IO 读取时,JuiceFS 的这种策略则效率不高,反而会因为读放大和本地 Cache 的频繁写入与驱逐使得系统资源的实际利用率降低。不幸的是,此类场景下一般的缓存策略很难有足够高的收益。此时可考虑的一个方向是尽可能提升缓存的整体容量,以期达到能几乎完全缓存所需数据的效果;另一个方向则可以直接将缓存关闭(设置 `--cache-size 0`),并尽可能提高对象存储的读取性能。 + +小文件的读取则比较简单,通常就是在一次请求里读取完整个文件。由于小文件写入时会直接被缓存起来,因此类似 JuiceFS bench 这种写入后不久就读取的访问模式基本都会在本地 Cache 目录命中,性能非常可观。 diff --git a/docs/zh_cn/development/metadata-design/_kv.md b/docs/zh_cn/development/metadata-design/_kv.md new file mode 100644 index 0000000..58dc61d --- /dev/null +++ b/docs/zh_cn/development/metadata-design/_kv.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 分布式 K/V 存储 +sidebar_position: 3 +--- +# 元数据设计 - 分布式 K/V 存储 \ No newline at end of file diff --git a/docs/zh_cn/development/metadata-design/_redis.md b/docs/zh_cn/development/metadata-design/_redis.md new file mode 100644 index 0000000..cdc1a1b --- /dev/null +++ b/docs/zh_cn/development/metadata-design/_redis.md @@ -0,0 +1,5 @@ +--- +sidebar_label: Redis +sidebar_position: 1 +--- +# 元数据设计 - Redis \ No newline at end of file diff --git a/docs/zh_cn/development/metadata-design/_sql.md b/docs/zh_cn/development/metadata-design/_sql.md new file mode 100644 index 0000000..ae057b9 --- /dev/null +++ b/docs/zh_cn/development/metadata-design/_sql.md @@ -0,0 +1,5 @@ +--- +sidebar_label: SQL 引擎 +sidebar_position: 2 +--- +# 元数据设计 - SQL 引擎 \ No newline at end of file diff --git a/docs/zh_cn/faq.md b/docs/zh_cn/faq.md new file mode 100644 index 0000000..4d2e3a0 --- /dev/null +++ b/docs/zh_cn/faq.md @@ -0,0 +1,110 @@ +# FAQ + +## 为什么不支持某个对象存储? + +已经支持了绝大部分对象存储,参考这个[列表](reference/how_to_setup_object_storage.md#支持的存储服务)。如果它跟 S3 兼容的话,也可以当成 S3 来使用。否则,请创建一个 issue 来增加支持。 + +## 是否可以使用 Redis 集群版? + +不可以。JuiceFS 使用了 Redis 的[事务功能](https://redis.io/topics/transactions)来保证元数据操作的原子性,而分布式版还不支持分布式事务。哨兵节点或者其它的 Redis 高可用方法是需要的。 + +请查看[「Redis 最佳实践」](administration/metadata/redis_best_practices.md)了解更多信息。 + +## JuiceFS 与 XXX 的区别是什么? + +请查看[「同类技术对比」](comparison/juicefs_vs_alluxio.md)文档了解更多信息。 + +## JuiceFS 的性能如何? + +JuiceFS 是一个分布式文件系统,元数据访问的延时取决于挂载点到服务端之间 1 到 2 个网络来回(通常 1-3 ms),数据访问的延时取决于对象存储的延时 (通常 20-100 ms)。顺序读写的吞吐量可以到 50MiB/s 至 2800MiB/s(查看 [fio 测试结果](benchmark/fio.md)),取决于网络带宽以及数据是否容易被压缩。 + +JuiceFS 内置多级缓存(主动失效),一旦缓存预热好,访问的延时和吞吐量非常接近单机文件系统的性能(FUSE 会带来少量的开销)。 + +## JuiceFS 支持随机读写吗? + +支持,包括通过 mmap 等进行的随机读写。目前 JuiceFS 主要是对顺序读写进行了大量优化,对随机读写的优化也在进行中。如果想要更好的随机读性能,建议关闭压缩([`--compress none`](reference/command_reference.md#juicefs-format))。 + +## 数据更新什么时候会对其它客户端可见? + +所有的元数据更新都是立即对其它客户端可见。JuiceFS 保证关闭再打开(close-to-open)一致性,请查看[「一致性」](administration/cache_management.md#一致性)了解更多信息。 + +通过 `write()` 新写入的数据会缓存在内核和客户端中,可以被当前机器的其它进程看到,其它机器暂时看不到。 + +调用 `fsync()`、`fdatasync()` 或者 `close()` 来强制将数据上传到对象存储并更新元数据,或者数秒钟自动刷新后,其它客户端才能看到更新,这也是绝大多数分布式文件系统采取的策略。 + +请查看[「客户端写缓存」](administration/cache_management.md#客户端写缓存)了解更多信息。 + +## 怎么快速地拷贝大量小文件到 JuiceFS? + +请在挂载时加上 [`--writeback` 选项](reference/command_reference.md#juicefs-mount),它会先把数据写入本机的缓存,然后再异步上传到对象存储,会比直接上传到对象存储快很多倍。 + +请查看[「客户端写缓存」](administration/cache_management.md#客户端写缓存)了解更多信息。 + +## 可以用 `root` 以外的用户挂载吗? + +可以,JuiceFS 可以由任何用户挂载。默认的缓存目录是 `$HOME/.juicefs/cache`(macOS)或者 `/var/jfsCache`(Linux),请确保该用户对这个目录有写权限,或者切换到其它有权限的目录。 + +请查看[「客户端读缓存」](administration/cache_management.md#客户端读缓存)了解更多信息。 + +## 怎么卸载 JuiceFS 文件系统? + +请使用 [`juicefs umount`](reference/command_reference.md#juicefs-umount) 命令卸载。 + +## 怎么升级 JuiceFS 客户端? + +首先请卸载 JuiceFS 文件系统,然后使用新版本的客户端重新挂载。 + +## `docker: Error response from daemon: error while creating mount source path 'XXX': mkdir XXX: file exists.` + +当你使用 [Docker bind mounts](https://docs.docker.com/storage/bind-mounts) 把宿主机上的一个目录挂载到容器中时,你可能会遇到这个错误。这是因为使用了非 root 用户执行了 `juicefs mount` 命令,进而导致 Docker 没有权限访问这个目录。 + +这个问题有两种解决方法: + +1. 用 root 用户执行 `juicefs mount` 命令 +2. 修改 FUSE 的配置文件以及增加 `allow_other` 挂载选项,请查看[这个文档](reference/fuse_mount_options.md#allow_other)了解更多信息。 + +## `/go/pkg/tool/linux_amd64/link: running gcc failed: exit status 1` 或者 `/go/pkg/tool/linux_amd64/compile: signal: killed` + +这个错误有可能是因为 GCC 版本过低导致,请尝试升级 GCC 到 5.4 及以上版本。 + +## `format: ERR wrong number of arguments for 'auth' command` + +这个错误意味着你使用的 Redis 版本小于 6.0.0 同时在执行 `juicefs format` 命令时指定了 username 参数。只有 Redis 6.0.0 版本以后才支持指定 username,因此你需要省略 URL 中的 username 参数,例如 `redis://:password@host:6379/1`。 + +## `fuse: fuse: exec: "/bin/fusermount": stat /bin/fusermount: no such file or directory` + +这个错误意味着使用了非 root 用户执行 `juicefs mount` 命令,并且 `fusermount` 这个命令也找不到。 + +这个问题有两种解决方法: + +1. 用 root 用户执行 `juicefs mount` 命令 +2. 安装 `fuse` 包(例如 `apt-get install fuse`、`yum install fuse`) + +## `fuse: fuse: fork/exec /usr/bin/fusermount: permission denied` + +这个错误意味着当前用户没有执行 `fusermount` 命令的权限。例如,你可以通过下面的命令检查 `fusermount` 命令的权限: + +```sh +$ ls -l /usr/bin/fusermount +-rwsr-x---. 1 root fuse 27968 Dec 7 2011 /usr/bin/fusermount +``` + +上面的例子表示只有 root 用户和 `fuse` 用户组的用户有权限执行。另一个例子: + +```sh +$ ls -l /usr/bin/fusermount +-rwsr-xr-x 1 root root 32096 Oct 30 2018 /usr/bin/fusermount +``` + +上面的例子表示所有用户都有权限执行。 + +## 为什么同一个用户在主机 X 上有权限访问 JuiceFS 的文件,在主机 Y 上访问该文件却没有权限? + +该用户在主机 X 和主机 Y 上的 UID 或者 GID 不一样。使用 `id` 命令可以显示用户的 UID 和 GID: + +```bash +$ id alice +uid=1201(alice) gid=500(staff) groups=500(staff) +``` + +阅读文档[「多主机间同步账户」](administration/sync_accounts_between_multiple_hosts.md)解决这个问题。 diff --git a/docs/zh_cn/getting-started/_choose_metadata_engine.md b/docs/zh_cn/getting-started/_choose_metadata_engine.md new file mode 100644 index 0000000..367f5a1 --- /dev/null +++ b/docs/zh_cn/getting-started/_choose_metadata_engine.md @@ -0,0 +1,6 @@ +--- +sidebar_label: 如何选择元数据引擎 +sidebar_position: 4 +--- + +# 如何选择元数据引擎 \ No newline at end of file diff --git a/docs/zh_cn/getting-started/_prerequisites.md b/docs/zh_cn/getting-started/_prerequisites.md new file mode 100644 index 0000000..e37e791 --- /dev/null +++ b/docs/zh_cn/getting-started/_prerequisites.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 环境要求 +sidebar_position: 1 +--- +# 环境要求 diff --git a/docs/zh_cn/getting-started/_quick_start_guide.md b/docs/zh_cn/getting-started/_quick_start_guide.md new file mode 100644 index 0000000..3fdaada --- /dev/null +++ b/docs/zh_cn/getting-started/_quick_start_guide.md @@ -0,0 +1,244 @@ +--- +sidebar_label: 快速上手指南 +sidebar_position: 4 +slug: /quick_start_guide +--- + + +# JuiceFS 快速上手指南 + +创建 JuiceFS 文件系统,需要以下 3 个方面的准备: + +1. 准备 Redis 数据库 +2. 准备对象存储 +3. 下载安装 JuiceFS 客户端 + +:::tip 提示 +还不了解 JuiceFS?可以先查阅 [JuiceFS 是什么?](../introduction/introduction.md) +::: + +## 1. 准备 Redis 数据库 + +你可以很容易的在云计算平台购买到各种配置的云 Redis 数据库,但如果你只是想要快速评估 JuiceFS,可以使用 Docker 快速的在本地电脑上运行一个 Redis 数据库实例: + +```shell +$ sudo docker run -d --name redis \ + -v redis-data:/data \ + -p 6379:6379 \ + --restart unless-stopped \ + redis redis-server --appendonly yes +``` + +容器创建成功以后,可使用 `redis://127.0.0.1:6379` 访问 Redis 数据库。 + +:::info 说明 +以上命令将 Redis 的数据持久化在 Docker 的 `redis-data` 数据卷当中,你可以按需修改数据持久化的存储位置。 +::: + +:::caution 注意 +以上命令创建的 Redis 数据库实例没有启用身份认证,且暴露了主机的 `6379` 端口,如果你要通过互联网访问这个数据库实例,请参考 [Redis Security](https://redis.io/topics/security) 中的建议。 +::: + +有关 Redis 数据库相关的更多内容,[点此查看](../reference/how_to_setup_metadata_engine.md#redis)。 + +## 2. 准备对象存储 + +和 Redis 数据库一样,几乎所有的公有云计算平台都提供对象存储服务。因为 JuiceFS 支持几乎所有主流平台的对象存储服务,因此你可以根据个人偏好自由选择。你可以查看我们的 [对象存储支持列表和设置指南](../reference/how_to_setup_object_storage.md),其中列出了 JuiceFS 目前支持的所有对象存储服务,以及具体的使用方法。 + +当然,如果你只是想要快速评估 JuiceFS,使用 Docker 可以很轻松的在本地电脑运行一个 MinIO 对象存储实例: + +```shell +$ sudo docker run -d --name minio \ + -p 9000:9000 \ + -p 9900:9900 \ + -v $PWD/minio-data:/data \ + --restart unless-stopped \ + minio/minio server /data --console-address ":9900" +``` + +容器创建成功以后使用以下地址访问: + +- **MinIO 管理界面**:http://127.0.0.1:9900 +- **MinIO API**:http://127.0.0.1:9000 + +对象存储初始的 Access Key 和 Secret Key 均为 `minioadmin`。 + +:::info 说明 +最新的 MinIO 集成了新版控制台界面,以上命令通过 `--console-address ":9900"` 为控制台设置并映射了 `9900` 端口。另外,还将 MinIO 对象存储的数据路径映射到了当前目录下的 `minio-data` 文件夹中,你可以按需修改这些参数。 +::: + +## 3. 安装 JuiceFS 客户端 + +JuiceFS 同时支持 Linux、Windows、macOS 等操作系统及各种处理器架构,你可以在 [这里下载](https://github.com/juicedata/juicefs/releases/latest) 最新的预编译的二进制程序,请参考[这个文档](installation.md#安装预编译客户端)根据实际使用的系统和处理器架构选择对应的版本。 + +以 x86 架构的 Linux 系统为例,下载文件名包含 `linux-amd64` 的压缩包: + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +解压并安装: + +```shell +$ tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +$ sudo install juicefs /usr/local/bin +``` + +:::tip 提示 +你也可以从源代码[「手动编译 JuiceFS 客户端」](installation.md#手动编译客户端) +::: + +## 4. 创建 JuiceFS 文件系统 + +创建 JuiceFS 文件系统要使用 `format` 子命令,需要同时指定用来存储元数据的 Redis 数据库和用来存储实际数据的对象存储。 + +以下命令将创建一个名为 `pics` 的 JuiceFS 文件系统,使用 Redis 中的 `1` 号数据库存储元数据,使用 MinIO 中创建的 `pics` 存储桶存储实际数据。 + +```shell +$ juicefs format \ + --storage minio \ + --bucket http://127.0.0.1:9000/pics \ + --access-key minioadmin \ + --secret-key minioadmin \ + redis://127.0.0.1:6379/1 \ + pics +``` + +执行命令后,会看到类似下面的内容输出,说明 JuiceFS 文件系统创建成功了。 + +```shell +2021/04/29 23:01:18.352256 juicefs[34223] : Meta address: redis://127.0.0.1:6379/1 +2021/04/29 23:01:18.354252 juicefs[34223] : Ping redis: 132.185µs +2021/04/29 23:01:18.354758 juicefs[34223] : Data use minio://127.0.0.1:9000/pics/pics/ +2021/04/29 23:01:18.361674 juicefs[34223] : Volume is formatted as {Name:pics UUID:9c0fab76-efd0-43fd-a81e-ae0916e2fc90 Storage:minio Bucket:http://127.0.0.1:9000/pics AccessKey:minioadmin SecretKey:removed BlockSize:4096 Compression:none Partitions:0 EncryptKey:} +``` + +可以通过 `juicefs format -h` 命令,获得创建文件系统的完整帮助信息。 + +:::info 说明 +你可以根据需要,创建无限多个 JuiceFS 文件系统。但需要注意的是,每个 Redis 数据库中只能创建一个文件系统。比如要再创建一个名为 `memory` 的文件系统时,可以使用 Redis 中的 2 号数据库,即 `redis://127.0.0.1:6379/2` 。 +::: + +:::info 说明 +如果不指定 `--storage` 选项,JuiceFS 客户端会使用本地磁盘作为数据存储。使用本地存储时,JuiceFS 只能在本地单机使用,无法被网络内其他客户端挂载,[点此](../reference/how_to_setup_object_storage.md#本地磁盘)查看详情。 +::: + +## 5. 挂载 JuiceFS 文件系统 + +JuiceFS 文件系统创建完成以后,接下来就可以把它挂载到操作系统上使用了。以下命令将 `pics` 文件系统挂载到 `/mnt/jfs` 目录中。 + +```shell +$ sudo juicefs mount -d redis://127.0.0.1:6379/1 /mnt/jfs +``` + +:::tip 提示 +挂载 JuiceFS 文件系统时,不需要显式指定文件系统的名称,只要填写正确的 Redis 服务器地址和数据库编号即可。 +::: + +执行命令后,会看到类似下面的内容输出,说明 JuiceFS 文件系统已经成功挂载到系统上了。 + +```shell +2021/04/29 23:22:25.838419 juicefs[37999] : Meta address: redis://127.0.0.1:6379/1 +2021/04/29 23:22:25.839184 juicefs[37999] : Ping redis: 67.625µs +2021/04/29 23:22:25.839399 juicefs[37999] : Data use minio://127.0.0.1:9000/pics/pics/ +2021/04/29 23:22:25.839554 juicefs[37999] : Cache: /var/jfsCache/9c0fab76-efd0-43fd-a81e-ae0916e2fc90 capacity: 1024 MB +2021/04/29 23:22:26.340509 juicefs[37999] : OK, pics is ready at /mnt/jfs +``` + +挂载完成以后就可以在 `/mnt/jfs` 目录中存取文件了,你可以执行 `df` 命令查看 JuiceFS 文件系统的挂载情况: + +```shell +$ df -Th +文件系统 类型 容量 已用 可用 已用% 挂载点 +JuiceFS:pics fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +:::info 说明 +默认情况下, JuiceFS 的缓存位于 `/var/jfsCache` 目录,为了获得该目录的读写权限,这里使用了 `sudo` 命令,以管理员权限挂载的 JuiceFS 文件系统。普通用户在读写 `/mnt/jfs` 时,需要为用户赋予该目录的操作权限。 +::: + +## 6. 开机自动挂载 JuiceFS + +将 `juicefs` 客户端重命名为 `mount.juicefs` 并复制到 `/sbin/` 目录: + +```shell +$ sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +:::info 说明 +执行以上命令之前,我们假设 `juicefs` 客户端程序已经在 `/usr/local/bin` 目录。你也可以直接从下载的客户端压缩包中再解压一份 `juicefs` 程序出来,按上述要求重命名并复制到 `/sbin/` 目录。 +::: + +编辑 `/etc/fstab` 配置文件,另起新行,参照以下格式添加一条记录: + +``` + juicefs _netdev[,] 0 0 +``` + +- 请将 `` 替换成实际的 Redis 数据库地址,格式为 `redis://:@:/`,例如:`redis://localhost:6379/1`。 +- 请将 `` 替换成文件系统实际的挂载点,例如:`/jfs`。 +- 如果需要,请将 `[,]` 替换为实际要设置的 [挂载选项](../reference/command_reference.md#juicefs-mount),多个选项之间用逗号分隔。 + +**例如:** + +``` +redis://localhost:6379/1 /jfs juicefs _netdev,max-uploads=50,writeback,cache-size=2048 0 0 +``` + +:::caution 注意 +默认情况下,CentOS 6 在系统启动时不会挂载网络文件系统,你需要执行命令开启网络文件系统的自动挂载支持: +::: + +```bash +$ sudo chkconfig --add netfs +``` + +## 7. 卸载文件系统 + +如果你需要卸载 JuiceFS 文件系统,可以先执行 `df` 命令查看系统中已挂载的文件系统信息: + +```shell +$ sudo df -Th + +文件系统 类型 容量 已用 可用 已用% 挂载点 +... +JuiceFS:pics fuse.juicefs 1.0P 1.1G 1.0P 1% /mnt/jfs +``` + +通过命令输出,可以看到,文件系统 `pics` 挂载点为 `/mnt/jfs`,执行 `umount` 子命令卸载: + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +:::tip 提示 +执行 `juicefs umount -h` 命令,可以获取卸载命令的详细帮助信息。 +::: + +### 卸载失败 + +如果执行命令后,文件系统卸载失败,提示 `Device or resource busy`: + +```shell +2021-05-09 22:42:55.757097 I | fusermount: failed to unmount /mnt/jfs: Device or resource busy +exit status 1 +``` + +发生这种情况,可能是因为某些程序正在读写文件系统中的文件。为了确保数据安全,你应该首先排查是哪些程序正在与文件系统中的文件进行交互(例如通过 `lsof` 命令),并尝试结束他们之间的交互动作,然后再重新执行卸载命令。 + +:::caution 注意 +以下内容包含的命令可能会导致文件损坏、丢失,请务必谨慎操作! +::: + +当然,在你能够确保数据安全的前提下,也可以在卸载命令中添加 `--force` 或 `-f` 参数,强制卸载文件系统: + +```shell +$ sudo juicefs umount --force /mnt/jfs +``` + +也可以使用 `fusermount` 命令卸载文件系统: + +```shell +$ sudo fusermount -u /mnt/jfs +``` diff --git a/docs/zh_cn/getting-started/for_distributed.md b/docs/zh_cn/getting-started/for_distributed.md new file mode 100644 index 0000000..3e2fc87 --- /dev/null +++ b/docs/zh_cn/getting-started/for_distributed.md @@ -0,0 +1,180 @@ +--- +sidebar_label: 快速上手(分布式模式) +sidebar_position: 3 +--- + +# JuiceFS 分布式模式快速上手指南 + +上一篇文档[「JuiceFS 单机模式快速上手指南」](for_local.md)通过采用「对象存储」和「SQLite」数据库的组合,实现了一个可以在任意主机上挂载的文件系统。得益于对象存储是可以被网络上任何有权限的计算机访问的特点,我们只需要把 SQLite 数据库文件复制到任何想要访问该存储的计算机,就可以实现在不同计算机上访问同一个 JuiceFS 文件系统。 + +很显然,想要依靠在计算机之间复制 SQLite 数据库的方式进行文件系统共享,虽然可行,但文件的实时性是得不到保证的。受限于 SQLite 这种单文件数据库无法被多个计算机同时读写访问的情况,为了能够让一个文件系统可以在分布式环境中被多个计算机同时挂载读写,我们需要采用支持通过网络访问的数据库,比如 Redis、PostgreSQL、MySQL 等。 + +本文以上一篇文档为基础,进一步将数据库从单用户的「SQLite」替换成多用户的「云数据库」,从而实现可以在网络上任何一台计算机上进行挂载读写的分布式文件系统。 + +## 基于网络的数据库 + +这里所谓的「基于网络的数据库」是指允许多个用户通过网络同时访问的数据库,从这个角度出发,可以简单的把数据库分成: + +1. **单机数据库**:数据库是单个文件,通常只能单机访问,如 SQLite,Microsoft Access 等; +2. **基于网络的数据库**:数据库通常是复杂的多文件结构,提供基于网络的访问接口,支持多用户同时访问,如 Redis、PostgreSQL 等。 + +JuiceFS 目前支持的基于网络的数据库有: + +- **键值数据库**:Redis、TiKV +- **关系型数据库**:PostgreSQL、MySQL、MariaDB + +不同的数据库性能和稳定性表现也各不相同,比如 Redis 是内存型键值数据库,性能极为出色,但可靠性相对较弱。PostgreSQL 是关系型数据库,相比之下性能没有内存型强悍,但它的可靠性要更强。 + +有关数据库选择方面的内容,我们会专门编写文档进行介绍。 + +## 云数据库 + +云计算平台通常都有种类丰富的云数据库提供,比如 Amazon RDS 提供各类关系型数据库的版本,Amazon ElastiCache 提供兼容 Redis 的内存型数据库产品。经过简单的初始化设置就可以创建出多副本、高可用的数据库集群。 + +当然,如果愿意,你可以自己在服务器上搭建数据库。 + +简单起见,这里以阿里云数据库 Redis 版为例介绍。对于基于网络的数据库来说,最基本的是以下 2 项信息: + +1. **数据库地址**:数据库的访问地址,云平台可能会针对内外网提供不同的链接; +2. **用户名和密码**:用于访问数据库时的身份验证信息。 + +## 上手实践 + +### 1. 安装客户端 + +在所有需要挂载文件系统的计算机上安装 JuiceFS 客户端,详情参照[安装 & 升级](installation.md)。 + +### 2. 准备对象存储 + +以下是以阿里云 OSS 为例的伪样本,你可以改用其他对象存储,详情参考 [JuiceFS 支持的存储](../reference/how_to_setup_object_storage.md#支持的存储服务)。 + +- **Bucket Endpoint**:`https://myjfs.oss-cn-shanghai.aliyuncs.com` +- **Access Key ID**:`ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**:`ZYXwvutsrqpoNMLkJiHgfeDCBA` + +### 3. 准备数据库 + +以下是以阿里云数据库 Redis 版为例的伪样本,你可以改用其他类型的数据库,详情参考 [JuiceFS 支持的数据库](../reference/how_to_setup_metadata_engine.md)。 + +- **数据库地址**:`myjfs-sh-abc.redis.rds.aliyuncs.com:6379` +- **数据库用户名**:`tom` +- **数据库密码**:`mypassword` + +在 JuiceFS 中使用 Redis 数据库的格式如下: + +``` +redis://:@:6379/1 +``` + +:::tip 提示 +Redis 6.0 之前的版本没有用户名,请省略 URL 中的 `` 部分,例如 `redis://:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1`(请注意密码前面的冒号是分隔符,需要保留)。 +::: + +### 4. 创建文件系统 + +以下命令使用「对象存储」和「Redis」数据库的组合创建了一个支持跨网络、多机同时挂载、共享读写的文件系统。 + +```shell +juicefs format \ + --storage oss \ + --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \ + --access-key ABCDEFGHIJKLMNopqXYZ \ + --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \ + redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + myjfs +``` + +文件系统创建完成后,终端将返回类似下面的内容: + +```shell +2021/12/16 16:37:14.264445 juicefs[22290] : Meta address: redis://@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 +2021/12/16 16:37:14.277632 juicefs[22290] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/12/16 16:37:14.281432 juicefs[22290] : Ping redis: 3.609453ms +2021/12/16 16:37:14.527879 juicefs[22290] : Data uses oss://myjfs/myjfs/ +2021/12/16 16:37:14.593450 juicefs[22290] : Volume is formatted as {Name:myjfs UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://myjfs AccessKey:ABCDEFGHIJKLMNopqXYZ SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +:::info 说明 +文件系统一经创建,相关的信息包括名称、对象存储、访问密钥等信息会完整的记录到数据库中。在当前的示例中,文件系统的信息被记录在 Redis 数据库中,因此在任何一台计算机上,只要拥有数据库地址、用户名和密码信息,就可以挂载读写该文件系统。 +::: + +### 5. 挂载文件系统 + +由于这个文件系统的「数据」和「元数据」都存储在基于网络的云服务中,因此在任何安装了 JuiceFS 客户端的计算机上都可以同时挂载该文件系统进行共享读写。例如: + +```shell +juicefs mount redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 mnt +``` + +#### 数据强一致性保证 + +对于多客户端同时挂载读写同一个文件系统的情况,JuiceFS 提供「关闭再打开(close-to-open)」一致性保证,即当两个及以上客户端同时读写相同的文件时,客户端 A 的修改在客户端 B 不一定能立即看到。但是,一旦这个文件在客户端 A 写入完成并关闭,之后在任何一个客户端重新打开该文件都可以保证能访问到最新写入的数据,不论是否在同一个节点。 + +#### 调大缓存提升性能 + +由于「对象存储」是基于网络的存储服务,不可避免会产生访问延时。为了解决这个问题,JuiceFS 提供并默认启用了缓存机制,即划拨一部分本地存储作为数据与对象存储之间的一个缓冲层,读取文件时会异步地将数据缓存到本地存储,详情请查阅[「缓存」](../administration/cache_management.md)。 + +缓存机制让 JuiceFS 可以高效处理海量数据的读写任务,默认情况下,JuiceFS 会在 `$HOME/.juicefs/cache` 或 `/var/jfsCache` 目录设置 100GiB 的缓存。在速度更快的 SSD 上设置更大的缓存空间可以有效提升 JuiceFS 的读写性能。 + +你可以使用 `--cache-dir` 调整缓存目录的位置,使用 `--cache-size` 调整缓存空间的大小,例如: + +```shell +juicefs mount + --background \ + --cache-dir /mycache \ + --cache-size 512000 \ + redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 mnt +``` + +:::note 注意 +JuiceFS 进程需要具有读写 `--cache-dir` 目录的权限。 +::: + +上述命令将缓存目录设置在了 `/mycache` 目录,并指定缓存空间为 500GiB。 + +#### 开机自动挂载 + +以 Linux 系统为例,假设客户端位于 `/usr/local/bin` 目录。将 JuiceFS 客户端重命名为 `mount.juicefs` 并复制到 `/sbin` 目录: + +```shell +sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +编辑 `/etc/fstab` 配置文件,遵照 fstab 的规则添加一条新记录: + +``` +redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/myjfs juicefs _netdev,max-uploads=50,writeback,cache-size=512000 0 0 +``` + +:::note 注意 +默认情况下,CentOS 6 在系统启动时不会挂载网络文件系统,你需要执行命令开启网络文件系统的自动挂载支持:`sudo chkconfig --add netfs` +::: + +### 6. 卸载文件系统 + +你可以通过 `juicefs umount` 命令卸载 JuiceFS 文件系统(假设挂载点路径是 `mnt`): + +```shell +juicefs umount mnt +``` + +#### 卸载失败 + +如果执行命令后,文件系统卸载失败,提示 `Device or resource busy`: + +```shell +2021-05-09 22:42:55.757097 I | fusermount: failed to unmount mnt: Device or resource busy +exit status 1 +``` + +发生这种情况,可能是因为某些程序正在读写文件系统中的文件。为了确保数据安全,你应该首先排查是哪些程序正在与文件系统中的文件进行交互(例如通过 `lsof` 命令),并尝试结束它们之间的交互动作,然后再重新执行卸载命令。 + +:::caution 注意 +以下内容包含的命令可能会导致文件损坏、丢失,请务必谨慎操作! +::: + +当然,在你能够确保数据安全的前提下,也可以在卸载命令中添加 `--force` 或 `-f` 参数,强制卸载文件系统: + +```shell +juicefs umount --force mnt +``` diff --git a/docs/zh_cn/getting-started/for_local.md b/docs/zh_cn/getting-started/for_local.md new file mode 100644 index 0000000..2feedbd --- /dev/null +++ b/docs/zh_cn/getting-started/for_local.md @@ -0,0 +1,175 @@ +--- +sidebar_label: 快速上手(单机模式) +sidebar_position: 2 +slug: /quick_start_guide +--- + +# JuiceFS 单机模式快速上手指南 + +JuiceFS 文件系统由[「对象存储」](../reference/how_to_setup_object_storage.md)和[「数据库」](../reference/how_to_setup_metadata_engine.md)共同驱动。除了对象存储,还支持使用本地磁盘、WebDAV 和 HDFS 等作为底层存储。因此,可以使用本地磁盘和 SQLite 数据库快速创建一个单机文件系统用以了解和体验 JuiceFS。 + +## 安装客户端 + +详情请参照[安装 & 升级](installation.md)。 + +不论你使用什么操作系统,当在终端输入并执行 `juicefs` 并返回了程序的帮助信息,就说明你成功安装了 JuiceFS 客户端。 + +## 创建文件系统 + +### 基本概念 + +创建文件系统使用客户端提供的 [`format`](../reference/command_reference.md#juicefs-format) 命令,一般格式为: + +```shell +juicefs format [command options] META-URL NAME +``` + +可见,格式化文件系统需要提供 3 种信息: + +1. **[command options]**:设定文件系统的存储介质,留空则**默认使用本地磁盘**作为存储介质,路径为 `"$HOME/.juicefs/local"` 或 `"/var/jfs"`; +2. **META-URL**:用来设置元数据存储,即数据库相关的信息,通常是数据库的 URL 或文件路径; +3. **NAME**:是文件系统的名称。 + +:::tip 提示 +JuiceFS 支持丰富的存储介质和元数据存储引擎,查看 [JuiceFS 支持的存储介质](../reference/how_to_setup_object_storage.md) 和 [JuiceFS 支持的元数据存储引擎](../reference/how_to_setup_metadata_engine.md)。 +::: + +### 上手实践 + +以 Linux 系统为例,以下命令创建了一个名为 `myjfs` 的文件系统。 + +```shell +juicefs format sqlite3://myjfs.db myjfs +``` + +创建完成将返回类似下面的输出: + +```shell {1,4} +2021/12/14 18:26:37.666618 juicefs[40362] : Meta address: sqlite3://myjfs.db +[xorm] [info] 2021/12/14 18:26:37.667504 PING DATABASE sqlite3 +2021/12/14 18:26:37.674147 juicefs[40362] : The latency to database is too high: 7.257333ms +2021/12/14 18:26:37.675713 juicefs[40362] : Data use file:///Users/herald/.juicefs/local/myjfs/ +2021/12/14 18:26:37.689683 juicefs[40362] : Volume is formatted as {Name:myjfs UUID:d5bdf7ea-472c-4640-98a6-6f56aea13982 Storage:file Bucket:/Users/herald/.juicefs/local/ AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +从返回的信息中可以看到,该文件系统使用 SQLite 作为元数据存储引擎,数据库文件位于当前目录,文件名为 `myjfs.db`,保存了 `myjfs` 文件系统的所有信息。它构建了完善的表结构,将用作所有数据的元信息的存储。 + +![](../images/sqlite-info.png) + +由于没有指定任何存储相关的选项,客户端默认使用本地磁盘作为存储介质,根据返回的信息, `myjfs` 的存储路径为 `file:///Users/herald/.juicefs/local/myjfs/`,即当前用户家目录下的 `.juicefs/local/myjfs/`。 + +## 挂载文件系统 + +### 基本概念 + +挂载文件系统使用客户端提供的 [`mount`](../reference/command_reference.md#juicefs-mount) 命令,一般格式为: + +```shell +juicefs mount [command options] META-URL MOUNTPOINT +``` + +与创建文件系统的命令类似,挂载文件系统需要提供以下信息: + +1. **[command options]**:用来指定文件系统相关的选项,例如:`-d` 可以实现后台挂载; +2. **META-URL**:用来设置元数据存储。即数据库相关的信息,通常是数据库的 URL 或文件路径; +3. **MOUNTPOINT**:指定文件系统的挂载点。 + +:::tip 提示 +Windows 系统的挂载点(MOUNTPOINT)应该使用尚未占用的盘符,比如:`Z:`、`Y:`。 +::: + +### 上手实践 + +:::note 注意 +由于 SQLite 是单文件数据库,挂载时要注意数据库文件的的路径,JuiceFS 同时支持相对路径和绝对路径。 +::: + +以下命令将 `myjfs` 文件系统挂载到当前目录下的 `mnt` 文件夹: + +```shell +juicefs mount sqlite3://myjfs.db mnt +``` + +![](../images/sqlite-mount-local.png) + +默认情况下,客户端会在前台挂载文件系统。就像你在上图中看到的那样,程序会一直运行在当前终端进程中,使用 Ctrl + C 组合键或关闭终端窗口,文件系统会被卸载。 + +为了让文件系统可以在后台保持挂载,你可以在挂载时指定 `-d` 或 `--background` 选项,即让客户端在守护进程中挂载文件系统。 + +```shell +juicefs mount sqlite3://myjfs.db mnt -d +``` + +接下来,任何存入挂载点 `mnt` 的文件,都会按照 [JuiceFS 的文件存储格式](../introduction/architecture.md#如何存储文件)被拆分成特定的「数据块」并存入 `$HOME/.juicefs/local/myjfs` 目录中,相对应的「元数据」会全部存储在 `myjfs.db` 数据库中。 + +最后执行以下命令可以将挂载点 `mnt` 卸载: + +```shell +juicefs umount mnt +``` + +## 更进一步 + +前面介绍的内容通常只适用于快速在本地体验和了解,帮助你对 JucieFS 的工作方式建立基本的认识。我们可以在前面内容的基础上更进一步,仍然使用 SQLite 存储元数据,把本地存储换成「对象存储」,做一个更有实用价值的方案。 + +### 对象存储 + +对象存储是一种基于 HTTP 协议的,提供简单访问 API 的网络存储服务。它的结构扁平,易于扩展,价格相对低廉,非常适合存储海量的非结构化数据。几乎所有主流的云计算平台都有提供对象存储服务,如亚马逊 S3、阿里云 OSS、Backblaze B2 等。 + +JuiceFS 支持几乎所有的对象存储服务,查看「[JuiceFS 支持的存储介质](../reference/how_to_setup_object_storage.md)」。 + +一般来说,创建对象存储通常只需要 2 个环节: + +1. 创建 `Bucket` 存储桶,拿到 Endpoint 地址; +2. 创建 `Access Key ID` 和 `Access Key Secret`,即对象存储 API 的访问密钥。 + +以阿里云 OSS 为例,创建好的资源大概像下面这样: + +- **Bucket Endpoint**:`https://myjfs.oss-cn-shanghai.aliyuncs.com` +- **Access Key ID**:`ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**:`ZYXwvutsrqpoNMLkJiHgfeDCBA` + +:::note 注意 +创建对象存储时的过程各个平台会略有差别,建议查看云平台的帮助手册操作。另外,有些平台可能会针对内外网提供不同的 Endpoint 地址,由于本文要从本地访问对象存储,因此请选择使用面向外网访问的地址。 +::: + +### 上手实践 + +接下来使用 SQLite 和阿里云 OSS 对象存储创建一个 JuiceFS 文件系统: + +:::note 注意 +如果 `myjfs.db` 文件已经存在,请先删除它再执行以下命令。 +::: + +```shell +juicefs format --storage oss \ + --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \ + --access-key ABCDEFGHIJKLMNopqXYZ \ + --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \ + sqlite3://myjfs.db myjfs +``` + +在上述命令中,数据库和文件系统名称保持不变,增加了对象存储相关的信息: + +- `--storage`:设置存储类型,比如 oss、s3 等; +- `--bucket`:设置对象存储的 Endpoint 地址; +- `--access-key`:设置对象存储 API 访问密钥 Access Key ID; +- `--secret-key`:设置对象存储 API 访问密钥 Access Key Secret。 + +:::note 注意 +请使用你自己的对象存储信息替换上述命令中的信息。 +::: + +创建完成即可进行挂载: + +```shell +juicefs mount sqlite3://myjfs.db mnt +``` + +可以看到,挂载命令与使用本地存储时完全一样,因为 JuiceFS 已经把对象存储相关的信息写入了 `myjfs.db` 数据库,挂载时无需重复提供。 + +相比使用本地磁盘,SQLite 和对象存储的组合实用价值更高。从应用的角度看,这种形式等同于将容量几乎无限的对象存储接入到了本地计算机,让你可以像使用本地磁盘那样使用云存储。 + +进一步的,该文件系统的所有数据都存储在云端的对象存储,因此可以把 `myjfs.db` 数据库复制到其他安装了 JuiceFS 客户端的计算机上进行挂载和读写。也就是说,任何一台计算机只要能够读取到存储了元数据的数据库,那么它就能够挂载读写该文件系统。 + +很显然,SQLite 这种单文件数据库很难实现被多台计算机同时访问。如果把 SQLite 改为 Redis、PostgreSQL、MySQL 等能够通过网络被多台计算机同时读写访问的数据库,那么就可以实现 JuiceFS 文件系统的分布式挂载读写。 diff --git a/docs/zh_cn/getting-started/installation.md b/docs/zh_cn/getting-started/installation.md new file mode 100644 index 0000000..5df821c --- /dev/null +++ b/docs/zh_cn/getting-started/installation.md @@ -0,0 +1,285 @@ +--- +sidebar_label: 安装 & 升级 +sidebar_position: 1 +slug: /installation +--- + +# 安装与升级 + +JuiceFS 有良好的跨平台能力,支持在几乎所有主流架构的各类操作系统上运行,包括且不限于 Linux、macOS、Windows、BSD 等。 + +JuiceFS 客户端只有一个二进制文件,你可以下载预编译的版本直接解压使用,也可以用源代码手动编译。 + +## 安装预编译客户端 + +你可以在 [GitHub](https://github.com/juicedata/juicefs/releases) 找到最新版客户端下载地址,每个版本的下载列表中都提供了面向不同 CPU 架构和操作系统的预编译版本,请注意识别选择,例如: + +| 文件名 | 说明 | +| ------------------------------------ | ---------------------------- | +| `juicefs-x.x.x-darwin-amd64.tar.gz` | 面向 Intel 芯片的 macOS 系统 | +| `juicefs-x.x.x-linux-amd64.tar.gz` | 面向 x86 架构 Linux 发行版 | +| `juicefs-x.x.x-linux-arm64.tar.gz` | 面向 ARM 架构的 Linux 发行版 | +| `juicefs-x.x.x-windows-amd64.tar.gz` | 面向 x86 架构的 Windows 系统 | +| `juicefs-hadoop-x.x.x-linux-amd64.jar` | 面向 x86 架构 Linux 发行版的 Hadoop Java SDK | + +:::tip 提示 +对于 M1 系列芯片的 macOS 系统,可以使用 `darwin-amd64` 版本客户端依赖 [Rosetta 2](https://support.apple.com/zh-cn/HT211861) 使用,也可以参考 [手动编译客户端](#手动编译客户端) 编译原生版本。 +::: + +### Linux 发行版 + +以 x86 架构的 Linux 系统为例,下载文件名包含 `linux-amd64` 的压缩包,在终端依次执行以下命令。 + +1. 获取最新的版本号 + + ```shell + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') + ``` + +2. 下载客户端到当前目录 + + ```shell + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +3. 解压安装包 + + ```shell + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +4. 安装客户端 + + ```shell + sudo install juicefs /usr/local/bin + ``` + +完成上述 4 个步骤,在终端执行 `juicefs` 命令,返回帮助信息,则说明客户端安装成功。 + +:::info 说明 +如果终端提示 `command not found`,可能是因为 `/usr/local/bin` 不在你的系统 `PATH` 环境变量中,可以执行 `echo $PATH` 查看系统设置了哪些可执行路径,根据返回结果选择一个恰当的路径,调整并重新执行第 4 步的安装命令。 +::: + +### Windows 系统 + +在 Windows 系统使用 JuiceFS 的方法有两种: + +1. [使用预编译的 Windows 客户端](#预编译的-windows-客户端) +2. [在 WSL 中使用 Linux 版客户端](#在-wsl-中使用-linux-版客户端) + +#### 预编译的 Windows 客户端 + +JuiceFS 的 Windows 客户端也是一个独立的二进制程序,下载解压即可直接运行使用。 + +1. 安装依赖程序 + + 由于 Windows 没有原生支持 FUSE 接口,首先需要下载安装 [WinFsp](http://www.secfs.net/winfsp/) 才能实现对 FUSE 的支持。 + + :::tip 提示 + **[WinFsp](https://github.com/billziss-gh/winfsp)** 是一个开源的 Windows 文件系统代理,它提供了一个 FUSE 仿真层,使得 JuiceFS 客户端可以将文件系统挂载到 Windows 系统中使用。 + ::: + +2. 安装客户端 + + 以 Windows 10 系统为例,下载文件名包含 `windows-amd64` 的压缩包,解压后得到 `juicefs.exe` 即是 JuiceFS 的客户端程序。 + + 为了便于使用,可以在 `C:\` 盘根目录创建一个名为 `juicefs` 的文件夹,把 `juicefs.exe` 解压到该文件夹中。然后将 `C:\juicefs` 文件夹路径添加到系统的环境变量,重启系统让设置生效以后,可直接使用使用系统自带的 `命令提示符` 或 `PowerShell` 等终端程序运行 `juicefs` 命令。 + + ![Windows ENV path](../images/windows-path.png) + +#### 在 WSL 中使用 Linux 版客户端 + +[WSL](https://docs.microsoft.com/zh-cn/windows/wsl/about) 全称 Windows Subsystem for Linux,即 Windows 的 Linux 子系统,从 Windows 10 版本 2004 以上或 Windows 11 开始支持该功能。它可以让你在 Windows 系统中运行原生的 GNU/Linux 的大多数命令行工具、实用工具和应用程序且不会产生传统虚拟机或双启动设置开销。 + +详情查看「[在 WSL 中使用 JuiceFS](../tutorials/juicefs_on_wsl.md)」 + +### macOS 系统 + +由于 macOS 默认不支持 FUSE 接口,需要先安装 [macFUSE](https://osxfuse.github.io/) 实现对 FUSE 的支持。 + +:::tip 提示 +[macFUSE](https://github.com/osxfuse/osxfuse) 是一个开源的文件系统增强工具,它让 macOS 可以挂载第三方的文件系统,使得 JuiceFS 客户端可以将文件系统挂载到 macOS 系统中使用。 +::: + +#### Homebrew 安装 + +如果你的系统安装了 [Homebrew](https://brew.sh/) 包管理器,可以执行以下命令安装 JuiceFS 客户端: + +```shell +brew tap juicedata/homebrew-tap +brew install juicefs +``` + +#### 预编译二进制程序 + +你也可以下载文件名包含 `darwin-amd64` 的二进制程序,解压后使用 `install` 命令将程序安装到系统的任意可执行路径,例如: + +```shell +sudo install juicefs /usr/local/bin +``` + +### Docker 容器 + +对于要在 Docker 容器中使用 JuiceFS 的情况,这里提供一份构建 JuiceFS 客户端镜像的 `Dockerfile`,可以以此为基础单独构建 JuiceFS 客户端镜像或与其他应用打包在一起使用。 + +```dockerfile +FROM ubuntu:20.04 + +RUN apt update && apt install -y curl fuse && \ + apt-get autoremove && \ + apt-get clean && \ + rm -rf \ + /tmp/* \ + /var/lib/apt/lists/* \ + /var/tmp/* + +RUN set -x && \ + mkdir /juicefs && \ + cd /juicefs && \ + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \ + curl -s -L "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" \ + | tar -zx && \ + install juicefs /usr/bin && \ + cd .. && \ + rm -rf /juicefs + +CMD [ "juicefs" ] +``` + +## 手动编译客户端 + +如果预编译的客户端中没有适用于你的版本,比如 FreeBSD 或 M1 芯片的 macOS,这时可以采用手动编译的方式编译适合你的 JuiceFS 客户端。 + +另外,手动编译客户端可以让你优先体验到 JuiceFS 开发中的各种新功能,但这需要你具备一定的软件编译相关的基础知识。 + +### 类 Unix 客户端 + +编译面向 Linux、macOS、BSD 等类 Unix 系统的客户端需要满足以下依赖: + +- [Go](https://golang.org) 1.16+ +- GCC 5.4+ + +:::tip 提示 +对于中国地区用户,为了加快获取 Go 模块的速度,建议通过 `GOPROXY` 环境变量设置国内的镜像服务器。例如:[Goproxy China](https://github.com/goproxy/goproxy.cn)。 +::: + +1. 克隆源码 + + ```shell + git clone https://github.com/juicedata/juicefs.git + ``` + +2. 进入源代码目录 + + ```shell + cd juicefs + ``` + +3. 切换分支 + + 源代码默认使用 `main` 分支,你可以切换到任何正式发布的版本,比如切换到 `v0.17.4`: + + ```shell + git checkout v0.17.4 + ``` + + :::caution 注意 + 开发分支经常涉及较大的变化,请不要将「开发分支」编译的客户端用于生产环境。 + ::: + +4. 执行编译 + + ```shell + make + ``` + + 编译好的 `juicefs` 二进制程序位于当前目录。 + +### 在 Windows 下编译 + +在 Windows 系统中编译 JuiceFS 客户端,需要安装 [Go](https://golang.org) 1.16+ 和 GCC 5.4+。 + +由于 GCC 没有原生 Windows 客户端,因此需要使用第三方提供的版本,可以使用 [MinGW-w64](https://sourceforge.net/projects/mingw-w64/) 或 [Cygwin](https://www.cygwin.com/),这里以 MinGW-w64 为例介绍。 + +下载 MinGW-w64 并将其内的 `bin` 目录添加到系统环境变量。 + +1. 克隆并进入项目目录: + + ```shell + git clone https://github.com/juicedata/juicefs.git && cd juicefs + ``` + +2. 复制 winfsp 头文件 + + ```shell + mkdir "C:\WinFsp\inc\fuse" + ``` + + ```shell + copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\ + ``` + + ```shell + dir "C:\WinFsp\inc\fuse" + ``` + + ```shell + set CGO_CFLAGS=-IC:/WinFsp/inc/fuse + ``` + + ```shell + go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse + ``` + +3. 编译客户端 + + ```shell + go build -ldflags="-s -w" -o juicefs.exe ./cmd + ``` + +### 在 Linux 中交叉编译 Windows 客户端 + +为 Windows 编译特定版本客户端的过程与[类 Unix 客户端](#类-unix-客户端)基本一致,可以直接在 Linux 系统中进行编译,但除了 `go` 和 `gcc` 必须安装以外,还需要安装: + +- [mingw-w64](https://www.mingw-w64.org/downloads/) + +安装 Linux 发行版包管理器提供的最新版本即可,例如 Ubuntu 20.04+ 可以直接安装: + +```shell +sudo apt install mingw-w64 +``` + +编译 Windows 客户端: + +```shell +make juicefs.exe +``` + +编译好的客户端是一个名为 `juicefs.exe` 的二进制文件,位于当前目录。 + +## 客户端升级 + +JuiceFS 客户端只有一个二进制程序,升级新版只需用新版程序替换旧版程序即可。 + +- **使用预编译客户端**:可以参照本文档中相应系统的安装方法,下载最新的客户端,覆盖旧版客户端即可。 +- **手动编译客户端**:可以拉取最新的源代码重新编译,覆盖旧版客户端即可。 + +:::caution 注意 +对于使用旧版 JuiceFS 客户端已经挂载好的文件系统,需要先[卸载文件系统](for_distributed.md#6-卸载文件系统),然后用新版 JuiceFS 客户端重新挂载。 +::: + +## 卸载客户端 + +JuiceFS 客户端只有一个二进制文件,只需找到程序所在位置删除即可。例如,参照本文档 Linux 系统安装的客户端,执行以下命令卸载客户端: + +```shell +sudo rm /usr/local/bin/juicefs +``` + +你还可以通过 `which` 命令查看程序所在位置: + +```shell +which juicefs +``` + +命令返回的路径即 JuiceFS 客户端在你系统上的安装位置。其他操作系统卸载方法依此类推。 diff --git a/docs/zh_cn/images/baiduyun.png b/docs/zh_cn/images/baiduyun.png new file mode 100644 index 0000000..a8c4a5e Binary files /dev/null and b/docs/zh_cn/images/baiduyun.png differ diff --git a/docs/zh_cn/images/baoyinxiaofei.png b/docs/zh_cn/images/baoyinxiaofei.png new file mode 100644 index 0000000..8cc54cb Binary files /dev/null and b/docs/zh_cn/images/baoyinxiaofei.png differ diff --git a/docs/zh_cn/images/bench-guide-bench.png b/docs/zh_cn/images/bench-guide-bench.png new file mode 100644 index 0000000..e079ad5 Binary files /dev/null and b/docs/zh_cn/images/bench-guide-bench.png differ diff --git a/docs/zh_cn/images/bench-guide-profile.png b/docs/zh_cn/images/bench-guide-profile.png new file mode 100644 index 0000000..2656bdf Binary files /dev/null and b/docs/zh_cn/images/bench-guide-profile.png differ diff --git a/docs/zh_cn/images/bench-guide-stats.png b/docs/zh_cn/images/bench-guide-stats.png new file mode 100644 index 0000000..2ea225f Binary files /dev/null and b/docs/zh_cn/images/bench-guide-stats.png differ diff --git a/docs/zh_cn/images/bigo.png b/docs/zh_cn/images/bigo.png new file mode 100644 index 0000000..8beea05 Binary files /dev/null and b/docs/zh_cn/images/bigo.png differ diff --git a/docs/zh_cn/images/cos-bucket-url.png b/docs/zh_cn/images/cos-bucket-url.png new file mode 100644 index 0000000..d8298c4 Binary files /dev/null and b/docs/zh_cn/images/cos-bucket-url.png differ diff --git a/docs/zh_cn/images/digitalocean-redis-guide.png b/docs/zh_cn/images/digitalocean-redis-guide.png new file mode 100644 index 0000000..00d410f Binary files /dev/null and b/docs/zh_cn/images/digitalocean-redis-guide.png differ diff --git a/docs/zh_cn/images/digitalocean-redis-url.png b/docs/zh_cn/images/digitalocean-redis-url.png new file mode 100644 index 0000000..8c41f67 Binary files /dev/null and b/docs/zh_cn/images/digitalocean-redis-url.png differ diff --git a/docs/zh_cn/images/dingdong.png b/docs/zh_cn/images/dingdong.png new file mode 100644 index 0000000..b362f00 Binary files /dev/null and b/docs/zh_cn/images/dingdong.png differ diff --git a/docs/zh_cn/images/encryption.png b/docs/zh_cn/images/encryption.png new file mode 100644 index 0000000..a285b19 Binary files /dev/null and b/docs/zh_cn/images/encryption.png differ diff --git a/docs/zh_cn/images/grafana_dashboard.png b/docs/zh_cn/images/grafana_dashboard.png new file mode 100644 index 0000000..c1074c1 Binary files /dev/null and b/docs/zh_cn/images/grafana_dashboard.png differ diff --git a/docs/zh_cn/images/hangtianhongtu.png b/docs/zh_cn/images/hangtianhongtu.png new file mode 100644 index 0000000..8294587 Binary files /dev/null and b/docs/zh_cn/images/hangtianhongtu.png differ diff --git a/docs/zh_cn/images/how-juicefs-stores-files-new.png b/docs/zh_cn/images/how-juicefs-stores-files-new.png new file mode 100644 index 0000000..cb09bad Binary files /dev/null and b/docs/zh_cn/images/how-juicefs-stores-files-new.png differ diff --git a/docs/zh_cn/images/how-juicefs-stores-files-redis.png b/docs/zh_cn/images/how-juicefs-stores-files-redis.png new file mode 100644 index 0000000..df29721 Binary files /dev/null and b/docs/zh_cn/images/how-juicefs-stores-files-redis.png differ diff --git a/docs/zh_cn/images/how-juicefs-stores-files.png b/docs/zh_cn/images/how-juicefs-stores-files.png new file mode 100644 index 0000000..12b94c0 Binary files /dev/null and b/docs/zh_cn/images/how-juicefs-stores-files.png differ diff --git a/docs/zh_cn/images/internals-read.png b/docs/zh_cn/images/internals-read.png new file mode 100644 index 0000000..48ca6a5 Binary files /dev/null and b/docs/zh_cn/images/internals-read.png differ diff --git a/docs/zh_cn/images/internals-stats.png b/docs/zh_cn/images/internals-stats.png new file mode 100644 index 0000000..8f026e8 Binary files /dev/null and b/docs/zh_cn/images/internals-stats.png differ diff --git a/docs/zh_cn/images/internals-write.png b/docs/zh_cn/images/internals-write.png new file mode 100644 index 0000000..1936a60 Binary files /dev/null and b/docs/zh_cn/images/internals-write.png differ diff --git a/docs/zh_cn/images/juicefs-aliyun.png b/docs/zh_cn/images/juicefs-aliyun.png new file mode 100644 index 0000000..7d2c340 Binary files /dev/null and b/docs/zh_cn/images/juicefs-aliyun.png differ diff --git a/docs/zh_cn/images/juicefs-arch-new.png b/docs/zh_cn/images/juicefs-arch-new.png new file mode 100644 index 0000000..cf065c0 Binary files /dev/null and b/docs/zh_cn/images/juicefs-arch-new.png differ diff --git a/docs/zh_cn/images/juicefs-arch.png b/docs/zh_cn/images/juicefs-arch.png new file mode 100644 index 0000000..b36611f Binary files /dev/null and b/docs/zh_cn/images/juicefs-arch.png differ diff --git a/docs/zh_cn/images/juicefs-bench.png b/docs/zh_cn/images/juicefs-bench.png new file mode 100644 index 0000000..63739c2 Binary files /dev/null and b/docs/zh_cn/images/juicefs-bench.png differ diff --git a/docs/zh_cn/images/juicefs-logo.png b/docs/zh_cn/images/juicefs-logo.png new file mode 100644 index 0000000..ff6a13e Binary files /dev/null and b/docs/zh_cn/images/juicefs-logo.png differ diff --git a/docs/zh_cn/images/juicefs-on-windows-new.png b/docs/zh_cn/images/juicefs-on-windows-new.png new file mode 100644 index 0000000..e1f1979 Binary files /dev/null and b/docs/zh_cn/images/juicefs-on-windows-new.png differ diff --git a/docs/zh_cn/images/juicefs-on-windows.png b/docs/zh_cn/images/juicefs-on-windows.png new file mode 100644 index 0000000..427fe74 Binary files /dev/null and b/docs/zh_cn/images/juicefs-on-windows.png differ diff --git a/docs/zh_cn/images/juicefs-profiling.gif b/docs/zh_cn/images/juicefs-profiling.gif new file mode 100644 index 0000000..3db752f Binary files /dev/null and b/docs/zh_cn/images/juicefs-profiling.gif differ diff --git a/docs/zh_cn/images/juicefs-qcloud.png b/docs/zh_cn/images/juicefs-qcloud.png new file mode 100644 index 0000000..f088029 Binary files /dev/null and b/docs/zh_cn/images/juicefs-qcloud.png differ diff --git a/docs/zh_cn/images/juicefs-s3-gateway-arch.png b/docs/zh_cn/images/juicefs-s3-gateway-arch.png new file mode 100644 index 0000000..36511e5 Binary files /dev/null and b/docs/zh_cn/images/juicefs-s3-gateway-arch.png differ diff --git a/docs/zh_cn/images/juicefs-storage-format-new.png b/docs/zh_cn/images/juicefs-storage-format-new.png new file mode 100644 index 0000000..684d399 Binary files /dev/null and b/docs/zh_cn/images/juicefs-storage-format-new.png differ diff --git a/docs/zh_cn/images/juicefs-storage-format.png b/docs/zh_cn/images/juicefs-storage-format.png new file mode 100644 index 0000000..adfe433 Binary files /dev/null and b/docs/zh_cn/images/juicefs-storage-format.png differ diff --git a/docs/zh_cn/images/juicefs_stats_watcher.png b/docs/zh_cn/images/juicefs_stats_watcher.png new file mode 100644 index 0000000..5b0bb21 Binary files /dev/null and b/docs/zh_cn/images/juicefs_stats_watcher.png differ diff --git a/docs/zh_cn/images/k3s-nginx-welcome.png b/docs/zh_cn/images/k3s-nginx-welcome.png new file mode 100644 index 0000000..15e6ae6 Binary files /dev/null and b/docs/zh_cn/images/k3s-nginx-welcome.png differ diff --git a/docs/zh_cn/images/kubesphere_app_shop.png b/docs/zh_cn/images/kubesphere_app_shop.png new file mode 100644 index 0000000..9796282 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_app_shop.png differ diff --git a/docs/zh_cn/images/kubesphere_app_template.png b/docs/zh_cn/images/kubesphere_app_template.png new file mode 100644 index 0000000..69abc48 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_app_template.png differ diff --git a/docs/zh_cn/images/kubesphere_create_minio.png b/docs/zh_cn/images/kubesphere_create_minio.png new file mode 100644 index 0000000..ed02a25 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_create_minio.png differ diff --git a/docs/zh_cn/images/kubesphere_create_secret.png b/docs/zh_cn/images/kubesphere_create_secret.png new file mode 100644 index 0000000..3aa88fd Binary files /dev/null and b/docs/zh_cn/images/kubesphere_create_secret.png differ diff --git a/docs/zh_cn/images/kubesphere_deployment.png b/docs/zh_cn/images/kubesphere_deployment.png new file mode 100644 index 0000000..db07a06 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_deployment.png differ diff --git a/docs/zh_cn/images/kubesphere_install_csi.png b/docs/zh_cn/images/kubesphere_install_csi.png new file mode 100644 index 0000000..6f864fc Binary files /dev/null and b/docs/zh_cn/images/kubesphere_install_csi.png differ diff --git a/docs/zh_cn/images/kubesphere_minio.png b/docs/zh_cn/images/kubesphere_minio.png new file mode 100644 index 0000000..4812423 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_minio.png differ diff --git a/docs/zh_cn/images/kubesphere_org_space.png b/docs/zh_cn/images/kubesphere_org_space.png new file mode 100644 index 0000000..fb68089 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_org_space.png differ diff --git a/docs/zh_cn/images/kubesphere_pod.png b/docs/zh_cn/images/kubesphere_pod.png new file mode 100644 index 0000000..030b4b1 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_pod.png differ diff --git a/docs/zh_cn/images/kubesphere_pvc.png b/docs/zh_cn/images/kubesphere_pvc.png new file mode 100644 index 0000000..79a1a41 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_pvc.png differ diff --git a/docs/zh_cn/images/kubesphere_redis.png b/docs/zh_cn/images/kubesphere_redis.png new file mode 100644 index 0000000..ab39e7c Binary files /dev/null and b/docs/zh_cn/images/kubesphere_redis.png differ diff --git a/docs/zh_cn/images/kubesphere_sc_create.png b/docs/zh_cn/images/kubesphere_sc_create.png new file mode 100644 index 0000000..6f725e2 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_sc_create.png differ diff --git a/docs/zh_cn/images/kubesphere_sc_update.png b/docs/zh_cn/images/kubesphere_sc_update.png new file mode 100644 index 0000000..e05e217 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_sc_update.png differ diff --git a/docs/zh_cn/images/kubesphere_shop_juicefs.jpg b/docs/zh_cn/images/kubesphere_shop_juicefs.jpg new file mode 100644 index 0000000..cf57a4a Binary files /dev/null and b/docs/zh_cn/images/kubesphere_shop_juicefs.jpg differ diff --git a/docs/zh_cn/images/kubesphere_update_csi.png b/docs/zh_cn/images/kubesphere_update_csi.png new file mode 100644 index 0000000..203c9a4 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_update_csi.png differ diff --git a/docs/zh_cn/images/kubesphere_update_secret.png b/docs/zh_cn/images/kubesphere_update_secret.png new file mode 100644 index 0000000..c19e630 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_update_secret.png differ diff --git a/docs/zh_cn/images/kubesphere_workload.png b/docs/zh_cn/images/kubesphere_workload.png new file mode 100644 index 0000000..abf6957 Binary files /dev/null and b/docs/zh_cn/images/kubesphere_workload.png differ diff --git a/docs/zh_cn/images/lixiang.png b/docs/zh_cn/images/lixiang.png new file mode 100644 index 0000000..1714573 Binary files /dev/null and b/docs/zh_cn/images/lixiang.png differ diff --git a/docs/zh_cn/images/meta-auto-backup-list.png b/docs/zh_cn/images/meta-auto-backup-list.png new file mode 100644 index 0000000..eeaa384 Binary files /dev/null and b/docs/zh_cn/images/meta-auto-backup-list.png differ diff --git a/docs/zh_cn/images/metadata-benchmark.svg b/docs/zh_cn/images/metadata-benchmark.svg new file mode 100644 index 0000000..83273aa --- /dev/null +++ b/docs/zh_cn/images/metadata-benchmark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/zh_cn/images/mi.png b/docs/zh_cn/images/mi.png new file mode 100644 index 0000000..d8bc210 Binary files /dev/null and b/docs/zh_cn/images/mi.png differ diff --git a/docs/zh_cn/images/minio-browser.png b/docs/zh_cn/images/minio-browser.png new file mode 100644 index 0000000..11d2dcd Binary files /dev/null and b/docs/zh_cn/images/minio-browser.png differ diff --git a/docs/zh_cn/images/pv-on-juicefs.png b/docs/zh_cn/images/pv-on-juicefs.png new file mode 100644 index 0000000..87d57a0 Binary files /dev/null and b/docs/zh_cn/images/pv-on-juicefs.png differ diff --git a/docs/zh_cn/images/qcloud-redis-network.png b/docs/zh_cn/images/qcloud-redis-network.png new file mode 100644 index 0000000..17e328c Binary files /dev/null and b/docs/zh_cn/images/qcloud-redis-network.png differ diff --git a/docs/zh_cn/images/qcloud.png b/docs/zh_cn/images/qcloud.png new file mode 100644 index 0000000..0605b23 Binary files /dev/null and b/docs/zh_cn/images/qcloud.png differ diff --git a/docs/zh_cn/images/rancher-chart-info.jpg b/docs/zh_cn/images/rancher-chart-info.jpg new file mode 100644 index 0000000..95abb78 Binary files /dev/null and b/docs/zh_cn/images/rancher-chart-info.jpg differ diff --git a/docs/zh_cn/images/rancher-chart-installed.jpg b/docs/zh_cn/images/rancher-chart-installed.jpg new file mode 100644 index 0000000..63334f4 Binary files /dev/null and b/docs/zh_cn/images/rancher-chart-installed.jpg differ diff --git a/docs/zh_cn/images/rancher-chart-search.jpg b/docs/zh_cn/images/rancher-chart-search.jpg new file mode 100644 index 0000000..aac3ce1 Binary files /dev/null and b/docs/zh_cn/images/rancher-chart-search.jpg differ diff --git a/docs/zh_cn/images/rancher-cluster-create.jpg b/docs/zh_cn/images/rancher-cluster-create.jpg new file mode 100644 index 0000000..ecbcbff Binary files /dev/null and b/docs/zh_cn/images/rancher-cluster-create.jpg differ diff --git a/docs/zh_cn/images/rancher-cluster-options.jpg b/docs/zh_cn/images/rancher-cluster-options.jpg new file mode 100644 index 0000000..864753d Binary files /dev/null and b/docs/zh_cn/images/rancher-cluster-options.jpg differ diff --git a/docs/zh_cn/images/rancher-clusters.jpg b/docs/zh_cn/images/rancher-clusters.jpg new file mode 100644 index 0000000..7a0075d Binary files /dev/null and b/docs/zh_cn/images/rancher-clusters.jpg differ diff --git a/docs/zh_cn/images/rancher-new-repo.jpg b/docs/zh_cn/images/rancher-new-repo.jpg new file mode 100644 index 0000000..6a646ae Binary files /dev/null and b/docs/zh_cn/images/rancher-new-repo.jpg differ diff --git a/docs/zh_cn/images/rancher-pvc.jpg b/docs/zh_cn/images/rancher-pvc.jpg new file mode 100644 index 0000000..6c68d6b Binary files /dev/null and b/docs/zh_cn/images/rancher-pvc.jpg differ diff --git a/docs/zh_cn/images/rancher-repos.jpg b/docs/zh_cn/images/rancher-repos.jpg new file mode 100644 index 0000000..ab126b6 Binary files /dev/null and b/docs/zh_cn/images/rancher-repos.jpg differ diff --git a/docs/zh_cn/images/rancher-welcome.jpeg b/docs/zh_cn/images/rancher-welcome.jpeg new file mode 100644 index 0000000..bd77dd1 Binary files /dev/null and b/docs/zh_cn/images/rancher-welcome.jpeg differ diff --git a/docs/zh_cn/images/repo-diagram.svg b/docs/zh_cn/images/repo-diagram.svg new file mode 100644 index 0000000..9cde737 --- /dev/null +++ b/docs/zh_cn/images/repo-diagram.svg @@ -0,0 +1 @@ +sdk/javasdk/javapkgpkghackhackdocsdocscmdcmdsrcsrclibjfslibjfsconfconfwinfspwinfspvfsvfsutilsutilssyncsyncobjectobjectmetametafusefusefsfscompresscompresschunkchunkwinfsp_headerswinfsp_headerszh_cnzh_cnimagesimagesenentesttestmainmainimagesimagesimagesimagesjava/io/juicefsjava/io/juicefsjava/io/juicefsjava/io/juicefscontractcontractutilsutilsbenchbench.builder.cmd.fs.gitignore.go.h.hack.java.json.md.meta.mod.properties.sh.svg.xml.yaml.ymleach dot sized by file size \ No newline at end of file diff --git a/docs/zh_cn/images/s3-gateway-file-manager.jpg b/docs/zh_cn/images/s3-gateway-file-manager.jpg new file mode 100644 index 0000000..e5b862f Binary files /dev/null and b/docs/zh_cn/images/s3-gateway-file-manager.jpg differ diff --git a/docs/zh_cn/images/s3ql-bin.jpg b/docs/zh_cn/images/s3ql-bin.jpg new file mode 100644 index 0000000..3397f19 Binary files /dev/null and b/docs/zh_cn/images/s3ql-bin.jpg differ diff --git a/docs/zh_cn/images/sequential-read-write-benchmark.svg b/docs/zh_cn/images/sequential-read-write-benchmark.svg new file mode 100644 index 0000000..4826a70 --- /dev/null +++ b/docs/zh_cn/images/sequential-read-write-benchmark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/zh_cn/images/sf.png b/docs/zh_cn/images/sf.png new file mode 100644 index 0000000..dc7a51f Binary files /dev/null and b/docs/zh_cn/images/sf.png differ diff --git a/docs/zh_cn/images/shopee.png b/docs/zh_cn/images/shopee.png new file mode 100644 index 0000000..51f50e1 Binary files /dev/null and b/docs/zh_cn/images/shopee.png differ diff --git a/docs/zh_cn/images/spark_ql_orc.png b/docs/zh_cn/images/spark_ql_orc.png new file mode 100644 index 0000000..1af8b40 Binary files /dev/null and b/docs/zh_cn/images/spark_ql_orc.png differ diff --git a/docs/zh_cn/images/spark_sql_parquet.png b/docs/zh_cn/images/spark_sql_parquet.png new file mode 100644 index 0000000..1ebe06f Binary files /dev/null and b/docs/zh_cn/images/spark_sql_parquet.png differ diff --git a/docs/zh_cn/images/sqlite-info.png b/docs/zh_cn/images/sqlite-info.png new file mode 100644 index 0000000..ed1f29d Binary files /dev/null and b/docs/zh_cn/images/sqlite-info.png differ diff --git a/docs/zh_cn/images/sqlite-mount-local.png b/docs/zh_cn/images/sqlite-mount-local.png new file mode 100644 index 0000000..90fe649 Binary files /dev/null and b/docs/zh_cn/images/sqlite-mount-local.png differ diff --git a/docs/zh_cn/images/windows-mount-startup.png b/docs/zh_cn/images/windows-mount-startup.png new file mode 100644 index 0000000..5bd57b4 Binary files /dev/null and b/docs/zh_cn/images/windows-mount-startup.png differ diff --git a/docs/zh_cn/images/windows-path-en.png b/docs/zh_cn/images/windows-path-en.png new file mode 100644 index 0000000..ec57747 Binary files /dev/null and b/docs/zh_cn/images/windows-path-en.png differ diff --git a/docs/zh_cn/images/windows-path.png b/docs/zh_cn/images/windows-path.png new file mode 100644 index 0000000..3eef636 Binary files /dev/null and b/docs/zh_cn/images/windows-path.png differ diff --git a/docs/zh_cn/images/windows-run-startup.png b/docs/zh_cn/images/windows-run-startup.png new file mode 100644 index 0000000..e46be46 Binary files /dev/null and b/docs/zh_cn/images/windows-run-startup.png differ diff --git a/docs/zh_cn/images/wsl/access-jfs-from-win.png b/docs/zh_cn/images/wsl/access-jfs-from-win.png new file mode 100644 index 0000000..6a55698 Binary files /dev/null and b/docs/zh_cn/images/wsl/access-jfs-from-win.png differ diff --git a/docs/zh_cn/images/wsl/init.png b/docs/zh_cn/images/wsl/init.png new file mode 100644 index 0000000..3c1a862 Binary files /dev/null and b/docs/zh_cn/images/wsl/init.png differ diff --git a/docs/zh_cn/images/wsl/mount-point.png b/docs/zh_cn/images/wsl/mount-point.png new file mode 100644 index 0000000..ff69305 Binary files /dev/null and b/docs/zh_cn/images/wsl/mount-point.png differ diff --git a/docs/zh_cn/images/wsl/startmenu.png b/docs/zh_cn/images/wsl/startmenu.png new file mode 100644 index 0000000..4831c2d Binary files /dev/null and b/docs/zh_cn/images/wsl/startmenu.png differ diff --git a/docs/zh_cn/images/wsl/windows-to-linux.png b/docs/zh_cn/images/wsl/windows-to-linux.png new file mode 100644 index 0000000..6dba6ac Binary files /dev/null and b/docs/zh_cn/images/wsl/windows-to-linux.png differ diff --git a/docs/zh_cn/images/wsl/winver.png b/docs/zh_cn/images/wsl/winver.png new file mode 100644 index 0000000..0a9a282 Binary files /dev/null and b/docs/zh_cn/images/wsl/winver.png differ diff --git a/docs/zh_cn/images/wsl/zone-identifier.png b/docs/zh_cn/images/wsl/zone-identifier.png new file mode 100644 index 0000000..51d1881 Binary files /dev/null and b/docs/zh_cn/images/wsl/zone-identifier.png differ diff --git a/docs/zh_cn/introduction/_case.md b/docs/zh_cn/introduction/_case.md new file mode 100644 index 0000000..8468b5d --- /dev/null +++ b/docs/zh_cn/introduction/_case.md @@ -0,0 +1,45 @@ +--- +sidebar_label: 应用场景 & 限制 +sidebar_position: 2 +slug: /case +--- +# JuiceFS 应用场景 & 限制 + +JuiceFS 广泛适用于各种数据存储和共享场景,本页汇总来自世界各地的 JuiceFS 应用案例,欢迎所有社区用户共同来维护这份案例列表。 + +## 数据备份、迁移与恢复 + +- [利用 JuiceFS 把 MySQL 备份验证性能提升 10 倍](https://juicefs.com/blog/cn/posts/optimize-xtrabackup-prepare-by-oplog/) +- [跨云数据搬迁利器:Juicesync](https://juicefs.com/blog/cn/posts/juicesync/) +- [下厨房基于 JuiceFS 的 MySQL 备份实践](https://juicefs.com/blog/cn/posts/xiachufang-mysql-backup-practice-on-juicefs/) +- [如何用 JuiceFS 归档备份 Nginx 日志](https://juicefs.com/blog/cn/posts/backup-nginx-logs-on-juicefs/) + +## 大数据 + +- [JuiceFS 如何帮助趣头条超大规模 HDFS 降负载](https://juicefs.com/blog/cn/posts/qutoutiao-big-data-platform-user-case/) +- [环球易购数据平台如何做到既提速又省钱?](https://juicefs.com/blog/cn/posts/globalegrow-big-data-platform-user-case/) +- [JuiceFS 在大搜车数据平台的实践](https://juicefs.com/blog/cn/posts/juicefs-practice-in-souche/) +- [使用 AWS Cloudformation 在 Amazon EMR 中一分钟配置 JuiceFS](https://aws.amazon.com/cn/blogs/china/use-aws-cloudformation-to-configure-juicefs-in-amazon-emr-in-one-minute/) +- [使用 JuiceFS 在云上优化 Kylin 4.0 的存储性能](https://juicefs.com/blog/cn/posts/optimize-kylin-on-juicefs/) +- [ClickHouse 存算分离架构探索](https://juicefs.com/blog/cn/posts/clickhouse-disaggregated-storage-and-compute-practice/) + +## AI + +- [如何借助 JuiceFS 为 AI 模型训练提速 7 倍](https://juicefs.com/blog/cn/posts/how-to-use-juicefs-to-speed-up-ai-model-training-by-7-times/) + +## 数据共享 + +- [基于 JuiceFS 搭建 Milvus 分布式集群](https://juicefs.com/blog/cn/posts/build-milvus-distributed-cluster-based-on-juicefs) +- [如何解决 NAS 单点故障还顺便省了 90% 的成本?](https://juicefs.com/blog/cn/posts/modao-replace-nas-with-juicefs/) + +## 内容收录 + +如果你也想把自己的 JuiceFS 应用方案添加到这份案例列表中,可以采用以下几种投稿方式: + +### 1. GitHub 投稿 + +你可以通过 GitHub 创建本仓库的分支,将你的案例网页链接添加到相应的分类中,提交 Pull Request 申请,等待审核和分支合并。 + +### 2. 社交媒体投稿 + +你可以加入 JuiceFS 官方的 [Slack 频道](https://juicefs.slack.com/),任何一位工作人员都可以接洽案例投稿事宜。 diff --git a/docs/zh_cn/introduction/architecture.md b/docs/zh_cn/introduction/architecture.md new file mode 100644 index 0000000..c920777 --- /dev/null +++ b/docs/zh_cn/introduction/architecture.md @@ -0,0 +1,49 @@ +--- +sidebar_label: 技术架构 +sidebar_position: 2 +slug: /architecture +--- +# JuiceFS 技术架构 + +本文介绍 JuiceFS 的核心架构,以及 JuiceFS 存储文件的原理。 + +## 核心架构 + +JuiceFS 文件系统由三个部分组成: + +- **JuiceFS 客户端**:协调对象存储和元数据存储引擎,以及 POSIX、Hadoop、Kubernetes CSI Driver、S3 Gateway 等文件系统接口的实现; +- **数据存储**:存储数据本身,支持本地磁盘、公有云或私有云对象存储、HDFS 等介质; +- **元数据引擎**:存储数据对应的元数据(metadata)包含文件名、文件大小、权限组、创建修改时间和目录结构,支持 Redis、MySQL、TiKV 等多种引擎; + +![image](../images/juicefs-arch-new.png) + +作为文件系统,JuiceFS 会分别处理数据及其对应的元数据,数据会被存储在对象存储中,元数据会被存储在元数据服务引擎中。 + +在 **数据存储** 方面,JuiceFS 支持几乎所有的公有云对象存储,同时也支持 OpenStack Swift、Ceph、MinIO 等私有化的对象存储。 + +在 **元数据存储** 方面,JuiceFS 采用多引擎设计,目前已支持 Redis、TiKV、MySQL/MariaDB、PostgreSQL、SQLite 等作为元数据服务引擎,也将陆续实现更多元数据存储引擎。欢迎 [提交 Issue](https://github.com/juicedata/juicefs/issues) 反馈你的需求。 + +在 **文件系统接口** 实现方面: + +- 通过 **FUSE**,JuiceFS 文件系统能够以 POSIX 兼容的方式挂载到服务器,将海量云端存储直接当做本地存储来使用。 +- 通过 **Hadoop Java SDK**,JuiceFS 文件系统能够直接替代 HDFS,为 Hadoop 提供低成本的海量存储。 +- 通过 **Kubernetes CSI Driver**,JuiceFS 文件系统能够直接为 Kubernetes 提供海量存储。 +- 通过 **S3 Gateway**,使用 S3 作为存储层的应用可直接接入,同时可使用 AWS CLI、s3cmd、MinIO client 等工具访问 JuiceFS 文件系统。 + +## 如何存储文件 + +文件系统作为用户和硬盘之间交互的媒介,它让文件可以妥善的被存储在硬盘上。如你所知,Windows 常用的文件系统有 FAT32、NTFS,Linux 常用的文件系统有 Ext4、XFS、Btrfs 等,每一种文件系统都有其独特的组织和管理文件的方式,它决定了文件系统的存储能力和性能等特征。 + +JuiceFS 作为一个文件系统也不例外,它的强一致性、高性能等特征离不开它独特的文件管理模式。 + +与传统文件系统只能使用本地磁盘存储数据和对应的元数据的模式不同,JuiceFS 会将数据格式化以后存储在对象存储(云存储),同时会将数据对应的元数据存储在 Redis 等数据库中。 + +任何存入 JuiceFS 的文件都会被拆分成固定大小的 **"Chunk"**,默认的容量上限是 64 MiB。每个 Chunk 由一个或多个 **"Slice"** 组成,Slice 的长度不固定,取决于文件写入的方式。每个 Slice 又会被进一步拆分成固定大小的 **"Block"**,默认为 4 MiB。最后,这些 Block 会被存储到对象存储。与此同时,JuiceFS 会将每个文件以及它的 Chunks、Slices、Blocks 等元数据信息存储在元数据引擎中。 + +![JuiceFS storage format](../images/juicefs-storage-format-new.png) + +使用 JuiceFS,文件最终会被拆分成 Chunks、Slices 和 Blocks 存储在对象存储。因此,你会发现在对象存储平台的文件浏览器中找不到存入 JuiceFS 的源文件,存储桶中只有一个 chunks 目录和一堆数字编号的目录和文件。不要惊慌,这正是 JuiceFS 文件系统高性能运作的秘诀! + +![How JuiceFS stores your files](../images/how-juicefs-stores-files-new.png) + +除了挂载文件系统以外,你还可以使用 [JuiceFS S3 网关](../deployment/s3_gateway.md),这样既可以使用 S3 兼容的客户端,也可以使用内置的基于网页的文件管理器访问 JuiceFS 存储的文件。 diff --git a/docs/zh_cn/introduction/introduction.md b/docs/zh_cn/introduction/introduction.md new file mode 100644 index 0000000..0533cb2 --- /dev/null +++ b/docs/zh_cn/introduction/introduction.md @@ -0,0 +1,65 @@ +--- +title: JuiceFS 是什么? +sidebar_label: JuiceFS 是什么? +sidebar_position: 1 +slug: . +--- +# + +![JuiceFS LOGO](../images/juicefs-logo.png) + +**JuiceFS** 是一款面向云原生设计的高性能共享文件系统,在 Apache 2.0 开源协议下发布。提供完备的 [POSIX](https://en.wikipedia.org/wiki/POSIX) 兼容性,可将几乎所有对象存储接入本地作为海量本地磁盘使用,亦可同时在跨平台、跨地区的不同主机上挂载读写。 + +JuiceFS 采用「数据」与「元数据」分离存储的架构,从而实现文件系统的分布式设计。使用 JuiceFS 存储数据,数据本身会被持久化在[对象存储](../reference/how_to_setup_object_storage.md#支持的存储服务)(例如,Amazon S3),相对应的元数据可以按需持久化在 Redis、MySQL、TiKV、SQLite 等多种[数据库](../reference/how_to_setup_metadata_engine.md)中。 + +JuiceFS 提供了丰富的 API,适用于各种形式数据的管理、分析、归档、备份,可以在不修改代码的前提下无缝对接大数据、机器学习、人工智能等应用平台,为其提供海量、弹性、低价的高性能存储。运维人员不用再为可用性、灾难恢复、监控、扩容等工作烦恼,专注于业务开发,提升研发效率。同时运维细节的简化,也让运维团队更容易向 DevOps 团队转型。 + +## 核心特性 + +1. **POSIX 兼容**:像本地文件系统一样使用,无缝对接已有应用,无业务侵入性; +2. **HDFS 兼容**:完整兼容 [HDFS API](../deployment/hadoop_java_sdk.md),提供更强的元数据性能; +3. **S3 兼容**:提供 [S3 网关](../deployment/s3_gateway.md) 实现 S3 协议兼容的访问接口; +4. **云原生**:通过 [CSI Driver](../deployment/how_to_use_on_kubernetes.md) 轻松地在 Kubernetes 中使用 JuiceFS; +5. **分布式设计**:同一文件系统可在上千台服务器同时挂载,高性能并发读写,共享数据; +6. **强一致性**:确认的文件修改会在所有服务器上立即可见,保证强一致性; +7. **强悍性能**:毫秒级延迟,近乎无限的吞吐量(取决于对象存储规模),查看[性能测试结果](../benchmark/benchmark.md); +8. **数据安全**:支持传输中加密(encryption in transit)和静态加密(encryption at rest),[查看详情](../security/encrypt.md); +9. **文件锁**:支持 BSD 锁(flock)和 POSIX 锁(fcntl); +10. **数据压缩**:支持 [LZ4](https://lz4.github.io/lz4) 和 [Zstandard](https://facebook.github.io/zstd) 压缩算法,节省存储空间。 + +## 技术架构 + +JuiceFS 文件系统由三个部分组成: + +1. **JuiceFS 客户端**:协调对象存储和元数据存储引擎,以及 POSIX、Hadoop、Kubernetes CSI Driver、S3 Gateway 等文件系统接口的实现; +2. **数据存储**:存储数据本身,支持本地磁盘、公有云或私有云对象存储、HDFS 等介质; +3. **元数据引擎**:存储数据对应的元数据(metadata)包含文件名、文件大小、权限组、创建修改时间和目录结构等,支持 Redis、MySQL、TiKV 等多种引擎; + +![image](../images/juicefs-arch-new.png) + +作为文件系统,JuiceFS 会分别处理数据及其对应的元数据,数据会被存储在对象存储中,元数据会被存储在元数据引擎中。 + +在 **数据存储** 方面,JuiceFS 支持几乎所有的公有云对象存储,同时也支持 OpenStack Swift、Ceph、MinIO 等支持私有化部署的开源对象存储。 + +在 **元数据存储** 方面,JuiceFS 采用多引擎设计,目前已支持 Redis、TiKV、MySQL/MariaDB、PostgreSQL、SQLite 等作为元数据服务引擎,也将陆续实现更多元数据存储引擎。欢迎 [提交 Issue](https://github.com/juicedata/juicefs/issues) 反馈你的需求。 + +在 **文件系统接口** 实现方面: + +- 通过 **FUSE**,JuiceFS 文件系统能够以 POSIX 兼容的方式挂载到服务器,将海量云端存储直接当做本地存储来使用。 +- 通过 **Hadoop Java SDK**,JuiceFS 文件系统能够直接替代 HDFS,为 Hadoop 提供低成本的海量存储。 +- 通过 **Kubernetes CSI Driver**,JuiceFS 文件系统能够直接为 Kubernetes 提供海量存储。 +- 通过 **S3 Gateway**,使用 S3 作为存储层的应用可直接接入,同时可使用 AWS CLI、s3cmd、MinIO client 等工具访问 JuiceFS 文件系统。 + +## 应用场景 + +JuiceFS 为海量数据存储设计,可以作为很多分布式文件系统和网络文件系统的替代,特别是以下场景: + +- **大数据分析**:HDFS 兼容,没有任何特殊 API 侵入业务;与主流计算引擎(Spark、Presto、Hive 等)无缝衔接;无限扩展的存储空间;运维成本几乎为 0;完善的缓存机制,高于对象存储性能数倍。 +- **机器学习**:POSIX 兼容,可以支持所有机器学习、深度学习框架;共享能力提升团队管理、使用数据效率。 +- **容器集群中的持久卷**:Kubernetes CSI 支持;持久存储并与容器生存期独立;强一致性保证数据正确;接管数据存储需求,保证服务的无状态化。 +- **共享工作区**:可以在任意主机挂载;没有客户端并发读写限制;POSIX 兼容已有的数据流和脚本操作。 +- **数据备份**:在无限平滑扩展的存储空间备份各种数据,结合共享挂载功能,可以将多主机数据汇总至一处,做统一备份。 + +## 数据隐私 + +JuiceFS 是开源软件,你可以在 [GitHub](https://github.com/juicedata/juicefs) 找到完整的源代码。在使用 JuiceFS 存储数据时,数据会按照一定的规则被拆分成数据块并保存在你自己定义的对象存储或其它存储介质中,数据所对应的元数据则存储在你自己定义的数据库中。 diff --git a/docs/zh_cn/mount_at_boot.md b/docs/zh_cn/mount_at_boot.md new file mode 100644 index 0000000..fc9503a --- /dev/null +++ b/docs/zh_cn/mount_at_boot.md @@ -0,0 +1,90 @@ +# Mount JuiceFS at Boot + +This is a guide about how to mount JuiceFS automatically at boot. + +## Linux + +Copy `juicefs` as `/sbin/mount.juicefs`, then edit `/etc/fstab` with following line: + +``` + juicefs _netdev[,] 0 0 +``` + +The format of `` is `redis://:@:/`, e.g. `redis://localhost:6379/1`. And replace `` with specific path you wanna mount JuiceFS to, e.g. `/jfs`. If you need set [mount options](reference/command_reference.md#juicefs-mount), replace `[,]` with comma separated options list. The following line is an example: + +``` +redis://localhost:6379/1 /jfs juicefs _netdev,max-uploads=50,writeback,cache-size=2048 0 0 +``` + +**Note: By default, CentOS 6 will NOT mount network file system after boot, run following command to enable it:** + +```bash +$ sudo chkconfig --add netfs +``` + +## macOS + +Create a file named `io.juicefs..plist` under `~/Library/LaunchAgents`. Replace `` with JuiceFS volume name. Add following contents to the file (again, replace `NAME`, `PATH-TO-JUICEFS`, `META-URL` and `MOUNTPOINT` with appropriate value): + +```xml + + + + + Label + io.juicefs.NAME + ProgramArguments + + PATH-TO-JUICEFS + mount + META-URL + MOUNTPOINT + + RunAtLoad + + + +``` + +Use following commands to load the file created in the previous step and test whether the loading is successful. **Please ensure Redis server is already running.** + +```bash +$ launchctl load ~/Library/LaunchAgents/io.juicefs..plist +$ launchctl start ~/Library/LaunchAgents/io.juicefs. +$ ls +``` + +If mount failed, you can add following configuration to `io.juicefs..plist` file for debug purpose: + +```xml + StandardOutPath + /tmp/juicefs.out + StandardErrorPath + /tmp/juicefs.err +``` + +Use following commands to reload the latest configuration and inspect the output: + +```bash +$ launchctl unload ~/Library/LaunchAgents/io.juicefs..plist +$ launchctl load ~/Library/LaunchAgents/io.juicefs..plist +$ cat /tmp/juicefs.out +$ cat /tmp/juicefs.err +``` + +If you install Redis server by Homebrew, you could use following command to start it at boot: + +```bash +$ brew services start redis +``` + +Then add following configuration to `io.juicefs..plist` file for ensure Redis server is loaded: + +```xml + KeepAlive + + OtherJobEnabled + homebrew.mxcl.redis + +``` + diff --git a/docs/zh_cn/reference/command_reference.md b/docs/zh_cn/reference/command_reference.md new file mode 100644 index 0000000..777b1b8 --- /dev/null +++ b/docs/zh_cn/reference/command_reference.md @@ -0,0 +1,741 @@ +--- +sidebar_label: 命令参考 +sidebar_position: 1 +slug: /command_reference +--- +# JuiceFS 命令参考 + +有许多命令可帮助您管理文件系统,该页面提供了有关这些命令的详细参考。 + +## 概览 + +在终端输入 `juicefs` 并执行,你就会看到所有可用的命令。另外,你可以在每个命令后面添加 `-h/--help` 标记获得该命令的详细帮助信息。 + +```shell +$ juicefs -h +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 1.0-dev (2021-12-27 3462bdbf) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime statistics + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + config change config of a volume + destroy destroy an existing volume + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +:::tip 提示 +如果 `juicefs` 不在 `$PATH` 中,你需要指定程序所在的路径才能执行。例如,`juicefs` 如果在当前目录中,则可以使用 `./juicefs`。为了方便使用,建议将 `juicefs` 添加到 `$PATH` 中。可以参考 [安装&升级](../getting-started/installation.md) 了解安装相关内容。 +::: + +:::note 注意 +如果命令选项是布尔(boolean)类型,例如 `--debug` ,无需设置任何值,只要在命令中添加 `--debug` 即代表启用该功能,反之则代表不启用。 +::: + +## 自动补全 + +:::note 注意 +此特性需要使用 0.15.2 及以上版本的 JuiceFS。它基于 `github.com/urfave/cli/v2` 实现,更多信息请参见[这里](https://github.com/urfave/cli/blob/master/docs/v2/manual.md#enabling)。 +::: + +通过加载 `hack/autocomplete` 下的对应脚本可以启用命令的自动补全,例如: + +### Bash + +```shell +source hack/autocomplete/bash_autocomplete +``` + +### Zsh + +```shell +source hack/autocomplete/zsh_autocomplete +``` + +请注意自动补全功能仅对当前会话有效。如果你希望对所有新会话都启用此功能,请将 `source` 命令添加到 `.bashrc` 或 `.zshrc` 中: + +```shell +echo "source path/to/bash_autocomplete" >> ~/.bashrc +``` + +或 + +```shell +echo "source path/to/zsh_autocomplete" >> ~/.zshrc +``` + +另外,如果你是在 Linux 系统上使用 bash,也可以直接将脚本拷贝到 `/etc/bash_completion.d` 目录并将其重命名为 `juicefs`: + +```shell +sudo cp hack/autocomplete/bash_autocomplete /etc/bash_completion.d/juicefs +``` + +```shell +source /etc/bash_completion.d/juicefs +``` + +## 命令列表 + +### juicefs format + +#### 描述 + +格式化文件系统;这是使用新文件系统的第一步。 + +#### 使用 + +``` +juicefs format [command options] META-URL NAME +``` + +- **META-URL**:用于元数据存储的数据库 URL,详情查看「[JuiceFS 支持的元数据引擎](how_to_setup_metadata_engine.md)」。 +- **NAME**:文件系统名称 + +#### 选项 + +`--block-size value`
+块大小;单位为 KiB (默认: 4096) + +`--capacity value`
+容量配额;单位为 GiB (默认: 不限制) + +`--inodes value`
+文件数配额 (默认: 不限制) + +`--compress value`
+压缩算法 (lz4, zstd, none) (默认: "none") + +`--shards value`
+将数据块根据名字哈希存入 N 个桶中 (默认: 0) + +`--storage value`
+对象存储类型 (例如 s3, gcs, oss, cos) (默认: "file") + +`--bucket value`
+存储数据的桶路径 (默认: `"$HOME/.juicefs/local"` 或 `"/var/jfs"`) + +`--access-key value`
+对象存储的 Access key (env `ACCESS_KEY`) + +`--secret-key value`
+对象存储的 Secret key (env `SECRET_KEY`) + +`--encrypt-rsa-key value`
+RSA 私钥的路径 (PEM) + +`--trash-days value`
+文件被自动清理前在回收站内保留的天数 (默认: 1) + +`--force`
+强制覆盖当前的格式化配置 (默认: false) + +`--no-update`
+不要修改已有的格式化配置 (默认: false) + +### juicefs mount + +#### 描述 + +挂载一个已经格式化的文件系统。 + +#### 使用 + +``` +juicefs mount [command options] META-URL MOUNTPOINT +``` + +- **META-URL**:用于元数据存储的数据库 URL,详情查看「[JuiceFS 支持的元数据引擎](how_to_setup_metadata_engine.md)」。 +- **MOUNTPOINT**:文件系统挂载点,例如:`/mnt/jfs`、`Z:`。 + +#### 选项 + +`--metrics value`
+监控数据导出地址 (默认: "127.0.0.1:9567") + +`--consul value`
+consul注册中心地址(默认: "127.0.0.1:8500") + +`--no-usage-report`
+不发送使用量信息 (默认: false) + +`-d, --background`
+后台运行 (默认: false) + +`--no-syslog`
+禁用系统日志 (默认: false) + +`--log value`
+后台运行时日志文件的位置 (默认: `$HOME/.juicefs/juicefs.log` 或 `/var/log/juicefs.log`) + +`-o value`
+其他 FUSE 选项 (参见[此文档](../reference/fuse_mount_options.md)来了解更多信息) + +`--attr-cache value`
+属性缓存过期时间;单位为秒 (默认: 1) + +`--entry-cache value`
+文件项缓存过期时间;单位为秒 (默认: 1) + +`--dir-entry-cache value`
+目录项缓存过期时间;单位为秒 (默认: 1) + +`--enable-xattr`
+启用扩展属性 (xattr) 功能 (默认: false) + +`--bucket value`
+为当前挂载点指定访问访对象存储的 endpoint + +`--get-timeout value`
+下载一个对象的超时时间;单位为秒 (默认: 60) + +`--put-timeout value`
+上传一个对象的超时时间;单位为秒 (默认: 60) + +`--io-retries value`
+网络异常时的重试次数 (默认: 30) + +`--max-uploads value`
+上传对象的连接数 (默认: 20) + +`--max-deletes value`
+删除对象的连接数 (默认: 2) + +`--buffer-size value`
+读写缓存的总大小;单位为 MiB (默认: 300) + +`--upload-limit value`
+上传带宽限制,单位为 Mbps (默认: 0) + +`--download-limit value`
+下载带宽限制,单位为 Mbps (默认: 0) + +`--prefetch value`
+并发预读 N 个块 (默认: 1) + +`--writeback`
+后台异步上传对象 (默认: false) + +`--cache-dir value`
+本地缓存目录路径;使用冒号隔离多个路径 (默认: `"$HOME/.juicefs/cache"` 或 `"/var/jfsCache"`) + +`--cache-size value`
+缓存对象的总大小;单位为 MiB (默认: 102400) + +`--free-space-ratio value`
+最小剩余空间比例 (默认: 0.1) + +`--cache-partial-only`
+仅缓存随机小块读 (默认: false) + +`--read-only`
+只读模式 (默认: false) + +`--open-cache value`
+打开的文件的缓存过期时间(0 代表关闭这个特性);单位为秒 (默认: 0) + +`--subdir value`
+将某个子目录挂载为根 (默认: "") + +### juicefs umount + +#### 描述 + +卸载一个文件文件系统。 + +#### 使用 + +``` +juicefs umount [command options] MOUNTPOINT +``` + +#### 选项 + +`-f, --force`
+强制卸载一个忙碌的文件系统 (默认: false) + +### juicefs gateway + +#### 描述 + +启动一个 S3 兼容的网关。 + +#### 使用 + +``` +juicefs gateway [command options] META-URL ADDRESS +``` + +- **META-URL**:用于元数据存储的数据库 URL,详情查看「[JuiceFS 支持的元数据引擎](how_to_setup_metadata_engine.md)」。 +- **ADDRESS**:S3 网关地址和监听的端口,例如:`localhost:9000` + +#### 选项 + +`--bucket value`
+为当前网关指定访问访对象存储的 endpoint + +`--get-timeout value`
+下载一个对象的超时时间;单位为秒 (默认: 60) + +`--put-timeout value`
+上传一个对象的超时时间;单位为秒 (默认: 60) + +`--io-retries value`
+网络异常时的重试次数 (默认: 30) + +`--max-uploads value`
+上传对象的连接数 (默认: 20) + +`--max-deletes value`
+删除对象的连接数 (默认: 2) + +`--buffer-size value`
+读写缓存的总大小;单位为 MiB (默认: 300) + +`--upload-limit value`
+上传带宽限制,单位为 Mbps (默认: 0) + +`--download-limit value`
+下载带宽限制,单位为 Mbps (默认: 0) + +`--prefetch value`
+并发预读 N 个块 (默认: 1) + +`--writeback`
+后台异步上传对象 (默认: false) + +`--cache-dir value`
+本地缓存目录路径;使用冒号隔离多个路径 (默认: `"$HOME/.juicefs/cache"` 或 `/var/jfsCache`) + +`--cache-size value`
+缓存对象的总大小;单位为 MiB (默认: 102400) + +`--free-space-ratio value`
+最小剩余空间比例 (默认: 0.1) + +`--cache-partial-only`
+仅缓存随机小块读 (默认: false) + +`--read-only`
+只读模式 (默认: false) + +`--open-cache value`
+打开的文件的缓存过期时间(0 代表关闭这个特性);单位为秒 (默认: 0) + +`--subdir value`
+将某个子目录挂载为根 (默认: "") + +`--attr-cache value`
+属性缓存过期时间;单位为秒 (默认: 1) + +`--entry-cache value`
+文件项缓存过期时间;单位为秒 (默认: 0) + +`--dir-entry-cache value`
+目录项缓存过期时间;单位为秒 (默认: 1) + +`--access-log value`
+访问日志的路径 + +`--metrics value`
+监控数据导出地址 (默认: "127.0.0.1:9567") + +`--no-usage-report`
+不发送使用量信息 (默认: false) + +`--no-banner`
+禁用 MinIO 的启动信息 (默认: false) + +`--multi-buckets`
+使用第一级目录作为存储桶 (默认: false) + +`--keep-etag`
+保留对象上传时的 ETag (默认: false) + +### juicefs sync + +#### 描述 + +在两个存储系统之间同步数据。 + +#### 使用 + +``` +juicefs sync [command options] SRC DST +``` + +- **SRC**:源路径 +- **DST**:目标路径 + +源路径和目标路径的格式均为 `[NAME://][ACCESS_KEY:SECRET_KEY@]BUCKET[.ENDPOINT][/PREFIX]`,其中: + +- `NAME`:JuiceFS 支持的数据存储类型(如 `s3`、`oss`),请参考[文档](how_to_setup_object_storage.md#支持的存储服务)。 +- `ACCESS_KEY` 和 `SECRET_KEY`:访问数据存储所需的密钥信息,请参考[文档](how_to_setup_object_storage.md#access-key-和-secret-key)。 +- `BUCKET[.ENDPOINT]`:数据存储服务的访问地址,不同存储类型格式可能不同,具体请参考[文档](how_to_setup_object_storage.md#支持的存储服务)。 +- `[/PREFIX]`:可选,源路径和目标路径的前缀,可用于限定只同步某些路径中的数据。 + +#### 选项 + +`--start KEY, -s KEY`
+同步的第一个对象名 + +`--end KEY, -e KEY`
+同步的最后一个对象名 + +`--threads value, -p value`
+并发线程数 (默认: 10) + +`--http-port PORT`
+监听的 HTTP 端口 (默认: 6070) + +`--update, -u`
+当源文件更新时修改已存在的文件 (默认: false) + +`--force-update, -f`
+强制修改已存在的文件 (默认: false) + +`--perms`
+保留权限设置 (默认: false) + +`--dirs`
+同步目录 (默认: false) + +`--dry`
+不拷贝文件 (默认: false) + +`--delete-src, --deleteSrc`
+同步后删除源存储的对象 (默认: false) + +`--delete-dst, --deleteDst`
+删除目标存储下的不相关对象 (默认: false) + +`--exclude PATTERN`
+跳过包含 PATTERN (POSIX正则表达式) 的对象名 + +`--include PATTERN`
+仅同步包含 PATTERN (POSIX正则表达式) 的对象名 + +`--manager value`
+管理者地址 + +`--worker value`
+工作节点列表 (使用逗号分隔) + +`--bwlimit value`
+限制最大带宽;单位为 Mbps (0 表示不限制) (默认: 0) + +`--no-https`
+不要使用 HTTPS (默认: false) + +`--check-all`
+验证源路径和目标路径中所有文件的数据完整性 (默认: false) + +`--check-new`
+验证新拷贝文件的数据完整性 (默认: false) + +### juicefs rmr + +#### 描述 + +递归删除指定目录下的所有文件。 + +#### 使用 + +``` +juicefs rmr PATH ... +``` + +### juicefs info + +#### 描述 + +显示指定路径或 inode 的内部信息。 + +#### 使用 + +``` +juicefs info [command options] PATH or INODE +``` + +#### 选项 + +`--inode, -i`
+使用 inode 号而不是路径 (当前目录必须在 JuiceFS 挂载点内) (默认: false) + +`--recursive, -r`
+递归获取所有子目录的概要信息(注意:当指定一个目录结构很复杂的路径时可能会耗时很长) (默认: false) + +### juicefs bench + +#### 描述 + +对指定的路径做基准测试,包括对大文件和小文件的读/写/获取属性操作。 + +#### 使用 + +``` +juicefs bench [command options] PATH +``` + +#### 选项 + +`--block-size value`
+块大小;单位为 MiB (默认: 1) + +`--big-file-size value`
+大文件大小;单位为 MiB (默认: 1024) + +`--small-file-size value`
+小文件大小;单位为 MiB (默认: 0.1) + +`--small-file-count value`
+小文件数量 (默认: 100) + +`--threads value, -p value`
+并发线程数 (默认: 1) + +### juicefs gc + +#### 描述 + +收集泄漏的对象。 + +#### 使用 + +``` +juicefs gc [command options] META-URL +``` + +#### 选项 + +`--delete`
+删除泄漏的对象 (默认: false) + +`--compact`
+整理所有文件的碎片 (默认: false). + +`--threads value`
+用于删除泄漏对象的线程数 (默认: 10) + +### juicefs fsck + +#### 描述 + +检查文件系统一致性。 + +#### 使用 + +``` +juicefs fsck [command options] META-URL +``` + +### juicefs profile + +#### 描述 + +分析[访问日志](../administration/fault_diagnosis_and_analysis.md#访问日志)。 + +#### 使用 + +``` +juicefs profile [command options] MOUNTPOINT/LOGFILE +``` + +#### 选项 + +`--uid value, -u value`
+仅跟踪指定 UIDs (用逗号分隔) + +`--gid value, -g value`
+仅跟踪指定 GIDs (用逗号分隔) + +`--pid value, -p value`
+仅跟踪指定 PIDs (用逗号分隔) + +`--interval value`
+显示间隔;在回放模式中将其设置为 0 可以立即得到整体的统计结果;单位为秒 (默认: 2) + +### juicefs stats + +#### 描述 + +展示实时的性能统计信息. + +#### 使用 + +``` +juicefs stats [command options] MOUNTPOINT +``` + +#### 选项 + +`--schema value`
+ +控制输出内容的标题字符串 (u: usage, f: fuse, m: meta, c: blockcache, o: object, g: go) (默认: "ufmco") + +`--interval value`
+ +更新间隔;单位为秒 (默认: 1) + +`--verbosity value`
+ +详细级别;通常 0 或 1 已足够 (默认: 0) + +`--nocolor`
+ +禁用颜色显示 (默认: false) + +### juicefs status + +#### 描述 + +显示 JuiceFS 的状态。 + +#### 使用 + +``` +juicefs status [command options] META-URL +``` + +#### 选项 + +`--session value, -s value`
+展示指定会话 (sid) 的具体信息 (默认: 0) + +### juicefs warmup + +#### 描述 + +主动为指定目录/文件建立缓存。 + +#### 使用 + +``` +juicefs warmup [command options] [PATH ...] +``` + +#### 选项 + +`--file value, -f value`
+指定一个包含一组路径的文件 + +`--threads value, -p value`
+并发的工作线程数 (默认: 50) + +`--background, -b`
+后台运行 (默认: false) + +### juicefs dump + +#### 描述 + +将元数据导出到一个 JSON 文件中。 + +#### 使用 + +``` +juicefs dump [command options] META-URL [FILE] +``` + +如果没有指定导出文件路径,会导出到标准输出。 + +#### 选项 + +`--subdir value`
+只导出一个子目录。 + +### juicefs load + +#### 描述 + +从之前导出的 JSON 文件中加载元数据。 + +#### 使用 + +``` +juicefs load [command options] META-URL [FILE] +``` + +如果没有指定导入文件路径,会从标准输入导入。 + +### juicefs config + +#### 描述 + +修改指定文件系统的配置项。 + +#### 使用 + +``` +juicefs config [command options] META-URL +``` + +#### 选项 + +`--capacity value`
+容量配额;单位为 GiB + +`--inodes value`
+文件数配额 + +`--bucket value`
+存储数据的桶路径 + +`--access-key value`
+对象存储的 Access key + +`--secret-key value`
+对象存储的 Secret key + +`--trash-days value`
+文件被自动清理前在回收站内保留的天数 + +`--force`
+跳过合理性检查并强制更新指定配置项 (默认: false) + +### juicefs destroy + +#### 描述 + +销毁一个已经存在的文件系统 + +#### 使用 + +``` +juicefs destroy [command options] META-URL UUID +``` + +#### 选项 + +`--force`
+跳过合理性检查并强制销毁文件系统 (默认: false) diff --git a/docs/zh_cn/reference/fuse_mount_options.md b/docs/zh_cn/reference/fuse_mount_options.md new file mode 100644 index 0000000..ee38f68 --- /dev/null +++ b/docs/zh_cn/reference/fuse_mount_options.md @@ -0,0 +1,28 @@ +--- +sidebar_label: FUSE 挂载选项 +sidebar_position: 6 +slug: /fuse_mount_options +--- +# FUSE 挂载选项 + +本指南列出了重要的 FUSE 挂载选项。当执行 [`juicefs mount`](../reference/command_reference.md#juicefs-mount) 命令时,这些安装选项由 `-o` 选项指定,多个选项使用半角逗号分隔。 例如: + +```bash +$ juicefs mount -d -o allow_other,writeback_cache localhost ~/jfs +``` + +## debug + +启用调试日志 + +## allow_other + +默认只有挂载文件系统的用户才能访问文件系统中的文件,此选项可解锁该限制。设置此选项以后,所有用户,包括 root 用户都可以访问该文件系统中的文件。 + +默认情况下,这个选项只允许 root 用户使用,但是可以通过修改 `/etc/fuse.conf`,在该配置文件中开启 `user_allow_other` 配置选项解除限制。 + +## writeback_cache + +> **注意**:该挂载选项仅在 Linux 3.15 及以上版本内核上支持。 + +FUSE 支持[「writeback-cache 模式」](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt),这意味着 `write()` 系统调用通常可以非常快速地完成。当频繁写入非常小的数据(如 100 字节左右)时,建议启用此挂载选项。 diff --git a/docs/zh_cn/reference/glossary.md b/docs/zh_cn/reference/glossary.md new file mode 100644 index 0000000..95e6963 --- /dev/null +++ b/docs/zh_cn/reference/glossary.md @@ -0,0 +1,4 @@ +# 术语表 + +:::note 注意 +文档正在编写 \ No newline at end of file diff --git a/docs/zh_cn/reference/how_juicefs_store_files.md b/docs/zh_cn/reference/how_juicefs_store_files.md new file mode 100644 index 0000000..090fc9d --- /dev/null +++ b/docs/zh_cn/reference/how_juicefs_store_files.md @@ -0,0 +1,20 @@ +--- +sidebar_label: JuiceFS 如何存储文件 +sidebar_position: 5 +slug: /how_juicefs_store_files +--- +# JuiceFS 如何存储文件 + +文件系统作为用户和硬盘之间交互的媒介,它让文件可以妥善的被存储在硬盘上。如你所知,Windows 常用的文件系统有 FAT32、NTFS,Linux 常用的文件系统有 Ext4、XFS、Btrfs 等,每一种文件系统都有其独特的组织和管理文件的方式,它决定了文件系统的存储能力和性能等特征。 + +JuiceFS 作为一个文件系统也不例外,它的强一致性、高性能等特征离不开它独特的文件管理模式。 + +与传统文件系统只能使用本地磁盘存储数据和对应的元数据的模式不同,JuiceFS 会将数据格式化以后存储在对象存储(云存储),同时会将数据对应的元数据存储在 Redis 等数据库中。 + +任何存入 JuiceFS 的文件都会被拆分成固定大小的 **"Chunk"**,默认的容量上限是 64 MiB。每个 Chunk 由一个或多个 **"Slice"** 组成,Slice 的长度不固定,取决于文件写入的方式。每个 Slice 又会被进一步拆分成固定大小的 **"Block"**,默认为 4 MiB。最后,这些 Block 会被存储到对象存储。与此同时,JuiceFS 会将每个文件以及它的 Chunks、Slices、Blocks 等元数据信息存储在元数据引擎中。 + +![JuiceFS storage format](../images/juicefs-storage-format-new.png) + +使用 JuiceFS,文件最终会被拆分成 Chunks、Slices 和 Blocks 存储在对象存储。因此,你会发现在对象存储平台的文件浏览器中找不到存入 JuiceFS 的源文件,存储桶中只有一个 chunks 目录和一堆数字编号的目录和文件。不要惊慌,这正是 JuiceFS 文件系统高性能运作的秘诀! + +![How JuiceFS stores your files](../images/how-juicefs-stores-files-new.png) diff --git a/docs/zh_cn/reference/how_to_setup_metadata_engine.md b/docs/zh_cn/reference/how_to_setup_metadata_engine.md new file mode 100644 index 0000000..ccad5c4 --- /dev/null +++ b/docs/zh_cn/reference/how_to_setup_metadata_engine.md @@ -0,0 +1,242 @@ +--- +sidebar_label: 如何设置元数据引擎 +sidebar_position: 3 +slug: /databases_for_metadata +--- +# JuiceFS 如何设置元数据引擎 + +通过阅读 [JuiceFS 的技术架构](../introduction/architecture.md) 和 [JuiceFS 如何存储文件](../reference/how_juicefs_store_files.md),你会了解到 JuiceFS 被设计成了一种将数据和元数据独立存储的架构,通常来说,数据被存储在以对象存储为主的云存储中,而数据所对应的元数据则被存储在独立的数据库中。 + +## 元数据存储引擎 + +元数据和数据同样至关重要,元数据中记录着每一个文件的详细信息,名称、大小、权限、位置等等。特别是这种数据与元数据分离存储的文件系统,元数据的读写性能直接决定了文件系统实际的性能表现。 + +JuiceFS 的元数据存储采用了多引擎设计。为了打造一个超高性能的云原生文件系统,JuiceFS 最先支持的是运行在内存上的键值数据库—— [Redis](https://redis.io),这使得 JuiceFS 拥有十倍于 Amazon [EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的性能表现,[查看测试结果](../benchmark/benchmark.md)。 + +通过与社区用户积极互动,我们发现很多应用场景并不绝对依赖高性能,有时用户只是想临时找到一个方便的工具在云上可靠的迁移数据,或者只是想更简单的把对象存储挂载到本地小规模地使用。因此,JuiceFS 陆续开放了对 MySQL/MariaDB、TiKV 等更多数据库的支持(性能对比数据可参考[这里](../benchmark/metadata_engines_benchmark.md))。 + +:::caution 特别提示 +不论采用哪种数据库存储元数据,**务必确保元数据的安全**。元数据一旦损坏或丢失,将导致对应数据彻底损坏或丢失,甚至损毁整个文件系统。对于生产环境,应该始终选择具有高可用能力的数据库,与此同时,建议定期「[备份元数据](../administration/metadata_dump_load.md)」。 +::: + +## Redis + +[Redis](https://redis.io/) 是基于内存的键值存储系统,在 BSD 协议下开源,可用于数据库、缓存和消息代理。 + +### 创建文件系统 + +使用 Redis 作为元数据存储引擎时,通常使用以下格式访问数据库: + +```shell +redis://username:password@host:6379/1 +``` + +`username` 是 Redis 6.0 之后引入的。如果没有用户名可以忽略,如 `redis://:password@host:6379/1`(密码前面的`:`冒号需要保留)。 + +例如,以下命令创建名为 `pics` 的 JuiceFS 文件系统,使用 Redis 中的 `1` 号数据库存储元数据: + +```shell +$ juicefs format --storage s3 \ + ... + "redis://:mypassword@192.168.1.6:6379/1" \ + pics +``` + +安全起见,建议使用环境变量 `REDIS_PASSWORD` 传递密码,例如: + +```shell +export REDIS_PASSWORD=mypassword +``` + +然后就无需在元数据 URL 中设置密码了: + +```shell +$ juicefs format --storage s3 \ + ... + "redis://192.168.1.6:6379/1" \ + pics +``` + +:::caution 特别提示 +JuiceFS 对 redis 的最低版本要求为 4.0 +::: + +### 挂载文件系统 + +```shell +sudo juicefs mount -d "redis://192.168.1.6:6379/1" /mnt/jfs +``` + +:::tip 提示 +如果需要在多台服务器上共享同一个文件系统,必须确保每台服务器都能访问到存储元数据的数据库。 +::: + +如果你自己维护 Redis 数据库,建议阅读 [Redis 最佳实践](../administration/metadata/redis_best_practices.md)。 + +## PostgreSQL + +[PostgreSQL](https://www.postgresql.org/) 是功能强大的开源关系型数据库,有完善的生态和丰富的应用场景,也可以用来作为 JuiceFS 的元数据引擎。 + +许多云计算平台都提供托管的 PostgreSQL 数据库服务,也可以按照[使用向导](https://www.postgresqltutorial.com/postgresql-getting-started/)自己部署一个。 + +其他跟 PostgreSQL 协议兼容的数据库(比如 CockroachDB 等) 也可以这样使用。 + +### 创建文件系统 + +使用 PostgreSQL 作为元数据引擎时,需要使用如下的格式来指定参数: + +```shell +postgres://[:@][:5432]/[?parameters] +``` + +例如: + +```shell +$ juicefs format --storage s3 \ + ... + "postgres://user:password@192.168.1.6:5432/juicefs" \ + pics +``` + +安全起见,建议使用环境变量传递数据库密码,例如: + +```shell +export $PG_PASSWD=mypassword +``` + +然后将元数据 URL 改为 `"postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs"` + +### 挂载文件系统 + +```shell +sudo juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs" /mnt/jfs +``` + +### 故障排除 + +JuiceFS 客户端默认采用 SSL 加密连接 PostgreSQL,如果连接时报错 `pq: SSL is not enabled on the server` 说明数据库没有启用 SSL。可以根据业务场景为 PostgreSQL 启用 SSL 加密,也可以在元数据 URL 中添加参数禁用加密验证: + +```shell +$ juicefs format --storage s3 \ + ... + "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs?sslmode=disable" \ + pics +``` + +元数据 URL 中还可以附加更多参数,[查看详情](https://pkg.go.dev/github.com/lib/pq#hdr-Connection_String_Parameters)。 + +## MySQL + +[MySQL](https://www.mysql.com/) 是受欢迎的开源关系型数据库之一,常被作为 Web 应用程序的首选数据库。 + +### 创建文件系统 + +使用 MySQL 作为元数据存储引擎时,通常使用以下格式访问数据库: + +```shell +mysql://:@(:3306)/ +``` + +例如: + +```shell +$ juicefs format --storage s3 \ + ... + "mysql://user:password@(192.168.1.6:3306)/juicefs" \ + pics +``` + +安全起见,建议使用环境变量传递数据库密码,例如: + +```shell +export $MYSQL_PASSWD=mypassword +``` + +然后将元数据 URL 改为 `"mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs"` + +### 挂载文件系统 + +```shell +sudo juicefs mount -d "mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs" /mnt/jfs +``` + +更多 MySQL 数据库的地址格式示例,[点此查看](https://github.com/Go-SQL-Driver/MySQL/#examples)。 + +## MariaDB + +[MariaDB](https://mariadb.org) 是 MySQL 的一个开源分支,由 MySQL 原始开发者维护并保持开源。 + +MariaDB 与 MySQL 高度兼容,在使用上也没有任何差别,创建和挂载文件系统时,保持与 MySQL 相同的语法。 + +例如: + +```shell +$ juicefs format --storage s3 \ + ... + "mysql://user:$MYSQL_PASSWD@(192.168.1.6:3306)/juicefs" \ + pics +``` + +## SQLite + +[SQLite](https://sqlite.org) 是全球广泛使用的小巧、快速、单文件、可靠、全功能的单文件 SQL 数据库引擎。 + +SQLite 数据库只有一个文件,创建和使用都非常灵活,用它作为 JuiceFS 元数据存储引擎时无需提前创建数据库文件,可以直接创建文件系统: + +```shell +$ juicefs format --storage s3 \ + ... + "sqlite3://my-jfs.db" \ + pics +``` + +以上命令会在当前目录创建名为 `my-jfs.db` 的数据库文件,请 **务必妥善保管** 这个数据库文件! + +挂载文件系统: + +```shell +sudo juicefs mount -d "sqlite3://my-jfs.db" +``` + +请注意数据库文件的位置,如果不在当前目录,则需要指定数据库文件的绝对路径,比如: + +```shell +sudo juicefs mount -d "sqlite3:///home/herald/my-jfs.db" /mnt/jfs/ +``` + +:::note 注意 +由于 SQLite 是一款单文件数据库,在不做特殊共享设置的情况下,只有数据库所在的主机可以访问它。对于多台服务器共享同一文件系统的情况,需要使用 Redis 或 MySQL 等数据库。 +::: + +## TiKV + +[TiKV](https://github.com/tikv/tikv) 是一个分布式事务型的键值数据库,最初作为 [PingCAP](https://pingcap.com) 旗舰产品 [TiDB](https://github.com/pingcap/tidb) 的存储层而研发,现已独立开源并从 [CNCF](https://www.cncf.io/projects) 毕业。 + +TiKV 的测试环境搭建非常简单,使用官方提供的 TiUP 工具即可实现一键部署,具体可参见[这里](https://tikv.org/docs/5.1/concepts/tikv-in-5-minutes/)。生产环境一般需要至少三个节点来存储三份数据副本,部署步骤可以参考[官方文档](https://tikv.org/docs/5.1/deploy/install/install/)。 + +### 创建文件系统 + +使用 TiKV 作为元数据引擎时,需要使用如下格式来指定参数: + +```shell +tikv://[,...]/ +``` + +其中 `prefix` 是一个用户自定义的字符串,当多个文件系统或者应用共用一个 TiKV 集群时,设置前缀可以避免混淆和冲突。示例如下: + +```shell +$ juicefs format --storage s3 \ + ... + "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" \ + pics +``` + +### 挂载文件系统 + +```shell +sudo juicefs mount -d "tikv://192.168.1.6:6379,192.168.1.7:6379,192.168.1.8:6379/jfs" /mnt/jfs +``` + +## FoundationDB + +即将推出...... diff --git a/docs/zh_cn/reference/how_to_setup_object_storage.md b/docs/zh_cn/reference/how_to_setup_object_storage.md new file mode 100644 index 0000000..b04db47 --- /dev/null +++ b/docs/zh_cn/reference/how_to_setup_object_storage.md @@ -0,0 +1,779 @@ +--- +sidebar_label: 如何设置对象存储 +sidebar_position: 4 +slug: /how_to_setup_object_storage +--- + +# JuiceFS 如何设置对象存储 + +通过阅读 [JuiceFS 的技术架构](../introduction/architecture.md)可以了解到,JuiceFS 是一个数据与元数据分离的分布式文件系统,以对象存储作为主要的数据存储,以 Redis、PostgreSQL、MySQL 等数据库作为元数据存储。 + +## 存储参数 + +在创建 JuiceFS 文件系统时,设置数据存储一般涉及以下几个选项: + +- `--storage` 指定文件系统要使用的存储类型,例如:`--storage s3`。 +- `--bucket` 指定存储访问地址,例如:`--bucket https://myjuicefs.s3.us-east-2.amazonaws.com`。 +- `--access-key` 和 `--secret-key` 指定访问存储时的身份认证信息。 + +例如,以下命令使用 Amazon S3 对象存储创建文件系统: + +```shell +$ juicefs format --storage s3 \ + --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \ + --access-key abcdefghijklmn \ + --secret-key nmlkjihgfedAcBdEfg \ + redis://192.168.1.6/1 \ + myjfs +``` + +## Access Key 和 Secret Key + +一般而言,对象存储通过 `Access Key ID` 和 `Access Key Secret` 验证用户身份,对应到 JuiceFS 文件系统就是 `--access-key` 和 `--secret-key` 这两个选项(或者简称为 AK、SK)。 + +创建文件系统时除了使用 `--access-key` 和 `--secret-key` 两个选项显式指定,更安全的做法是通过 `ACCESS_KEY` 和 `SECRET_KEY` 环境变量传递密钥信息,例如: + +```shell +$ export ACCESS_KEY=abcdefghijklmn +$ export SECRET_KEY=nmlkjihgfedAcBdEfg +$ juicefs format --storage s3 \ + --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \ + redis://192.168.1.6/1 \ + myjfs +``` + +公有云通常允许用户创建 IAM(Identity and Access Management)角色,例如:[AWS IAM 角色](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_roles.html) 或 [阿里云 RAM 角色](https://help.aliyun.com/document_detail/93689.html),可将角色分配给 VM 实例。如果云服务器实例已经拥有读写对象存储的权限,则无需再指定 `--access-key` 和 `--secret-key`。 + +## 使用代理 + +如果客户端所在的网络环境受防火墙策略或其他因素影响需要通过代理访问外部的对象存储服务,使用的操作系统不同,相应的代理设置方法也不同,请参考相应的用户手册进行设置。 + +以 Linux 为例,可以通过创建 `http_proxy` 和 `https_proxy` 环境变量设置代理: + +```shell +$ export http_proxy=http://localhost:8035/ +$ export https_proxy=http://localhost:8035/ +$ juicefs format \ + --storage s3 \ + ... \ + myjfs +``` + +## 支持的存储服务 + +如果你希望使用的存储类型不在列表中,欢迎提交需求 [issue](https://github.com/juicedata/juicefs/issues)。 + +| Name | Value | +| -------------------------------------- | ---------- | +| [Amazon S3](#amazon-s3) | `s3` | +| [Google 云存储](#google-云存储) | `gs` | +| [Azure Blob 存储](#azure-blob-存储) | `wasb` | +| [Backblaze B2](#backblaze-b2) | `b2` | +| [IBM 云对象存储](#ibm-云对象存储) | `ibmcos` | +| [Scaleway](#scaleway) | `scw` | +| [DigitalOcean Spaces](#digitalocean-spaces) | `space` | +| [Wasabi](#wasabi) | `wasabi` | +| [Storj DCS](#storj-dcs) | `s3` | +| [Vultr 对象存储](#vultr-对象存储) | `s3` | +| [阿里云 OSS](#阿里云-oss) | `oss` | +| [腾讯云 COS](#腾讯云-cos) | `cos` | +| [华为云 OBS](#华为云-obs) | `obs` | +| [百度云 BOS](#百度云-bos) | `bos` | +| [金山云 KS3](#金山云-ks3) | `ks3` | +| [美团云 MMS](#美团云-mms) | `mss` | +| [网易云 NOS](#网易云-nos) | `nos` | +| [青云 QingStor](#青云-qingstor) | `qingstor` | +| [七牛云 Kodo](#七牛云-kodo) | `qiniu` | +| [新浪云 SCS](#新浪云-scs) | `scs` | +| [天翼云 OOS](#天翼云-oos) | `oos` | +| [移动云 EOS](#移动云-eos) | `eos` | +| [优刻得 US3](#优刻得-us3) | `ufile` | +| [Ceph RADOS](#ceph-rados) | `ceph` | +| [Ceph RGW](#ceph-rgw) | `s3` | +| [Swift](#swift) | `swift` | +| [MinIO](#minio) | `minio` | +| [WebDAV](#webdav) | `webdav` | +| [HDFS](#hdfs) | `hdfs` | +| [Redis](#redis) | `redis` | +| [TiKV](#tikv) | `tikv` | +| [本地磁盘](#本地磁盘) | `file` | + +## Amazon S3 + +S3 支持[两种风格的 endpoint URI](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/VirtualHosting.html):`虚拟托管类型` 和 `路径类型`。 + +- 虚拟托管类型:`https://.s3..amazonaws.com` +- 路径类型:`https://s3..amazonaws.com/` + +其中 `` 要替换成实际的区域代码,比如:美国西部(俄勒冈)的区域代码为 `us-west-2`。[点此查看](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions)所有的区域代码。 + +:::note 注意 +AWS 中国的用户,应使用 `amazonaws.com.cn` 域名。相应的区域代码信息[点此查看](https://docs.amazonaws.cn/aws/latest/userguide/endpoints-arns.html)。 +::: + +:::note 注意 +如果 S3 的桶具有公共访问权限(支持匿名访问),请将 `--access-key` 设置为 `anonymous`。 +::: + +JuiceFS v0.12 之前的版本仅支持虚拟托管类型,v0.12 以及之后的版本两种风格都支持。例如: + +```bash +# 虚拟托管类型 +$ juicefs format \ + --storage s3 \ + --bucket https://.s3..amazonaws.com \ + ... \ + myjfs +``` + +```bash +# 路径类型 +$ juicefs format \ + --storage s3 \ + --bucket https://s3..amazonaws.com/ \ + ... \ + myjfs +``` + +你也可以将 `--storage` 设置为 `s3` 用来连接 S3 兼容的对象存储,比如: + +```bash +# 虚拟托管类型 +$ ./juicefs format \ + --storage s3 \ + --bucket https://. \ + ... \ + myjfs +``` + +```bash +# 路径类型 +$ ./juicefs format \ + --storage s3 \ + --bucket https:/// \ + ... \ + myjfs +``` + +:::tip 提示 +所有 S3 兼容的对象存储服务其 `--bucket` 选项的格式为 `https://.` 或者 `https:///`,默认的 `region` 为 `us-east-1`,当需要不同的 `region` 的时候,可以通过环境变量 `AWS_REGION` 或者 `AWS_DEFAULT_REGION` 手动设置。 +::: + +## Google 云存储 + +Google 云采用 [IAM](https://cloud.google.com/iam/docs/overview) 管理资源的访问权限,通过对[服务账号](https://cloud.google.com/iam/docs/creating-managing-service-accounts#iam-service-accounts-create-gcloud)授权,可以对云服务器、对象存储的访问权限进行精细化的控制。 + +对于归属于同一服务账号的云服务器和对象存储,只要该账号赋予了相关资源的访问权限,创建 JuiceFS 文件系统时无需提供身份验证信息,云平台会自行完成鉴权。 + +对于要从谷歌云平台外部访问对象存储的情况,比如要在本地计算机上使用 Google 云存储创建 JuiceFS 文件系统,则需要配置认证信息。由于 Google 云存储并不使用 `Access Key ID` 和 `Access Key Secret`,而是通过服务账号的 `JSON 密钥文件`验证身份。 + +请参考《[以服务帐号身份进行身份验证](https://cloud.google.com/docs/authentication/production)》为服务账号创建 `JSON 密钥文件`并下载到本地计算机,通过 `GOOGLE_APPLICATION_CREDENTIALS` 环境变量定义密钥文件的路径,例如: + +```shell +export GOOGLE_APPLICATION_CREDENTIALS="$HOME/service-account-file.json" +``` + +可以把创建环境变量的命令写入 `~/.bashrc` 或 `~/.profile` 让 Shell 在每次启动时自动设置。 + +配置了传递密钥信息的环境变量以后,在本地和在 Google 云服务器上创建文件系统的命令是完全相同的。例如: + +```bash +$ juicefs format \ + --storage gs \ + --bucket \ + ... \ + myjfs +``` + +可以看到,命令中无需包含身份验证信息,客户端会通过前面环境变量设置的 JSON 密钥文件完成对象存储的访问鉴权。同时,由于 bucket 名称是 [全局唯一](https://cloud.google.com/storage/docs/naming-buckets#considerations) 的,创建文件系统时,`--bucket` 选项中只需指定 bucket 名称即可。 + +## Azure Blob 存储 + +使用 Azure Blob 作为 JuiceFS 的数据存储,除了使用 `--access-key` 和 `--secret-key` 选项之外,你也可以使用 [连接字符串](https://docs.microsoft.com/zh-cn/azure/storage/common/storage-configure-connection-string) 并通过 `AZURE_STORAGE_CONNECTION_STRING` 环境变量进行设定。例如: + +```bash +# Use connection string +$ export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=XXX;AccountKey=XXX;EndpointSuffix=core.windows.net" +$ juicefs format \ + --storage wasb \ + --bucket https:// \ + ... \ + myjfs +``` + +:::note 注意 +Azure China 用户,`EndpointSuffix` 值为 `core.chinacloudapi.cn`。 +::: + +## Backblaze B2 + +使用 Backblaze B2 作为 JuiceFS 的数据存储,需要先创建 [application key](https://www.backblaze.com/b2/docs/application_keys.html),**Application Key ID** 和 **Application Key** 分别对应 `Access key` 和 `Secret key`。 + +Backblaze B2 支持两种访问接口:B2 原生 API 和 S3 兼容 API。 + +### B2 原生 API + +存储类型应设置为 `b2`,`--bucket` 只需设置 bucket 名称。例如: + +```bash +$ juicefs format \ + --storage b2 \ + --bucket \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +### S3 兼容 API + +存储类型应设置为 `s3`,`--bucket` 应指定完整的 bucket 地址。例如: + +```bash +$ juicefs format \ + --storage s3 \ + --bucket https://s3.eu-central-003.backblazeb2.com/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## IBM 云对象存储 + +使用 IBM 云对象存储创建 JuiceFS 文件系统,你首先需要创建 [API key](https://cloud.ibm.com/docs/account?topic=account-manapikey) 和 [instance ID](https://cloud.ibm.com/docs/key-protect?topic=key-protect-retrieve-instance-ID)。**API key** 和 **instance ID** 分别对应 `Access key` 和 `Secret key`。 + +IBM 云对象存储为每一个区域提供了 `公网` 和 `内网` 两种 [endpoint 地址](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints),你可以根据实际需要选用。例如: + +```bash +$ juicefs format \ + --storage ibmcos \ + --bucket https://. \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Scaleway + +使用 Scaleway 对象存储作为 JuiceFS 数据存储,请先 [查看文档](https://www.scaleway.com/en/docs/generate-api-keys) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的设置格式为 `https://.s3..scw.cloud`,请将其中的 `` 替换成实际的区域代码,例如:荷兰阿姆斯特丹的区域代码是 `nl-ams`。[点此查看](https://www.scaleway.com/en/docs/object-storage-feature/#-Core-Concepts) 所有可用的区域代码。 + +```bash +$ juicefs format \ + --storage scw \ + --bucket https://.s3..scw.cloud \ + ... \ + myjfs +``` + +## DigitalOcean Spaces + +使用 DigitalOcean Spaces 作为 JuiceFS 数据存储,请先 [查看文档](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的设置格式为 `https://..digitaloceanspaces.com`,请将其中的 `` 替换成实际的区域代码,例如:`nyc3`。[点此查看](https://www.digitalocean.com/docs/spaces/#regional-availability) 所有可用的区域代码。 + +```bash +$ juicefs format \ + --storage space \ + --bucket https://..digitaloceanspaces.com \ + ... \ + myjfs +``` + +## Wasabi + +使用 Wasabi 作为 JuiceFS 数据存储,请先 [查看文档](https://wasabi-support.zendesk.com/hc/en-us/articles/360019677192-Creating-a-Root-Access-Key-and-Secret-Key) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的设置格式为 `https://.s3..wasabisys.com`,请将其中的 `` 替换成实际的区域代码,例如:US East 1 (N. Virginia) 的区域代码为 `us-east-1`。[点此查看](https://wasabi-support.zendesk.com/hc/en-us/articles/360.15.26031-What-are-the-service-URLs-for-Wasabi-s-different-regions-) 所有可用的区域代码。 + +```bash +$ juicefs format \ + --storage wasabi \ + --bucket https://.s3..wasabisys.com \ + ... \ + myjfs +``` + +:::note 注意 +Tokyo (ap-northeast-1) 区域的用户,查看 [这篇文档](https://wasabi-support.zendesk.com/hc/en-us/articles/360039372392-How-do-I-access-the-Wasabi-Tokyo-ap-northeast-1-storage-region-) 了解 endpoint URI 的设置方法。 +::: + +## Storj DCS + +使用 Storj DCS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://docs.storj.io/api-reference/s3-compatible-gateway) 了解如何创建 `Access key` 和 `Secret key`。 + +Storj DCS 兼容 AWS S3,存储类型使用 `s3` ,`--bucket` 格式为 `https://gateway..storjshare.io/`。`` 为存储区域,目前 DCS 有三个可用存储区域:us1、ap1 和 eu1。 + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://gateway..storjshare.io/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Vultr 对象存储 + +Vultr 的对象存储兼容 S3 API,存储类型使用 `s3`,`--bucket` 格式为 `https://..vultrobjects.com/`。例如: + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://.ewr1.vultrobjects.com/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +访问对象存储的 API 密钥可以在 [管理控制台](https://my.vultr.com/objectstorage/) 中找到。 + + +## 阿里云 OSS + +使用阿里云 OSS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://help.aliyun.com/document_detail/38738.html) 了解如何创建 `Access key` 和 `Secret key`。如果你已经创建了 [RAM 角色](https://help.aliyun.com/document_detail/93689.html) 并指派给了云服务器实例,则在创建文件系统时可以忽略 `--access-key` 和 `--secret-key` 选项。 + +阿里云也支持使用 [Security Token Service (STS)](https://help.aliyun.com/document_detail/100624.html) 作为 OSS 的临时访问身份验证。如果你要使用 STS,请设置 `ALICLOUD_ACCESS_KEY_ID`、`ALICLOUD_ACCESS_KEY_SECRET` 和 `SECURITY_TOKEN ` 环境变量,不要设置 `--access-key` and `--secret-key` 选项。例如: + +```bash +# Use Security Token Service (STS) +$ export ALICLOUD_ACCESS_KEY_ID=XXX +$ export ALICLOUD_ACCESS_KEY_SECRET=XXX +$ export SECURITY_TOKEN=XXX +$ juicefs format \ + --storage oss \ + --bucket https://. \ + ... \ + myjfs +``` + +阿里云 OSS 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://help.aliyun.com/document_detail/31834.html),你可以根据实际的场景选用。 + +如果你是在阿里云的服务器上创建文件系统,可以在 `--bucket` 选项中直接指定 bucket 名称。例如: + +```bash +$ juicefs format \ + --storage oss \ + --bucket \ + ... \ + myjfs +``` + +## 腾讯云 COS + +使用腾讯云 COS 作为 JuiceFS 数据存储,Bucket 名称格式为 `-`,即需要在 bucket 名称后面指定 `APPID`,[点此查看](https://cloud.tencent.com/document/product/436/13312) 如何获取 `APPID` 。 + +`--bucket` 选项的完整格式为 `https://-.cos..myqcloud.com`,请将 `` 替换成你实际使用的存储区域,例如:上海的区域代码为 `ap-shanghai`。[点此查看](https://cloud.tencent.com/document/product/436/6224) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage cos \ + --bucket https://-.cos..myqcloud.com \ + ... \ + myjfs +``` + +如果你是在腾讯云的服务器上创建文件系统,可以在 `--bucket` 选项中直接指定 bucket 名称。 例如: + +```bash +# Running within Tencent Cloud +$ juicefs format \ + --storage cos \ + --bucket - \ + ... \ + myjfs +``` + +## 华为云 OBS + +使用华为云 OBS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://support.huaweicloud.com/usermanual-ca/zh-cn_topic_0046606340.html) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://.obs..myhuaweicloud.com`,请将 `` 替换成你实际使用的存储区域,例如:北京一的区域代码为 `cn-north-1`。[点此查看](https://developer.huaweicloud.com/endpoint?OBS) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage obs \ + --bucket https://.obs..myhuaweicloud.com \ + ... \ + myjfs +``` + +如果是你在华为云的服务器上创建文件系统,可以在 `--bucket` 直接指定 bucket 名称。例如: + +```bash +# Running within Huawei Cloud +$ juicefs format \ + --storage obs \ + --bucket \ + ... \ + myjfs +``` + +## 百度 BOS + +使用百度云 BOS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://cloud.baidu.com/doc/Reference/s/9jwvz2egb) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://..bcebos.com`,请将 `` 替换成你实际使用的存储区域,例如:北京的区域代码为 `bj`。[点此查看](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D%EF%BC%88endpoint%EF%BC%89) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage bos \ + --bucket https://..bcebos.com \ + ... \ + myjfs +``` + +如果你是在百度云的服务器上创建文件系统,可以在 `--bucket` 直接指定 bucket 名称。例如: + +```bash +# Running within Baidu Cloud +$ juicefs format \ + --storage bos \ + --bucket \ + ... \ + myjfs +``` + +## 金山云 KS3 + +使用金山云 KS3 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://docs.ksyun.com/documents/1386) 了解如何创建 `Access key` 和 `Secret key`。 + +金山云 KS3 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://docs.ksyun.com/documents/6761),你可以根据实际的场景选用。 + +```bash +$ juicefs format \ + --storage ks3 \ + --bucket https://. \ + ... \ + myjfs +``` + +## 美团云 MMS + +使用美团云 MMS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://www.mtyun.com/doc/api/mss/mss/fang-wen-kong-zhi) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://.`,请将 `` 替换成你实际地址,例如:`mtmss.com`。[点此查看](https://www.mtyun.com/doc/products/storage/mss/index#%E5%8F%AF%E7%94%A8%E5%8C%BA%E5%9F%9F) 所有可用的 endpoint 地址。例如: + +```bash +$ juicefs format \ + --storage mss \ + --bucket https://. \ + ... \ + myjfs +``` + +## 网易云 NOS + +使用网易云 NOS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://www.163yun.com/help/documents/55485278220111872) 了解如何创建 `Access key` 和 `Secret key`。 + +网易云 NOS 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://www.163yun.com/help/documents/67078583131230208),你可以根据实际的场景选用。例如: + +```bash +$ juicefs format \ + --storage nos \ + --bucket https://. \ + ... \ + myjfs +``` + +## 青云 QingStor + +使用青云 QingStor 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://docsv3.qingcloud.com/storage/object-storage/api/practices/signature/#%E8%8E%B7%E5%8F%96-access-key) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://..qingstor.com`,请将 `` 替换成你实际使用的存储区域,例如:北京 3-A 的区域代码为 `pek3a`。[点此查看](https://docs.qingcloud.com/qingstor/#%E5%8C%BA%E5%9F%9F%E5%8F%8A%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage qingstor \ + --bucket https://..qingstor.com \ + ... \ + myjfs +``` + +:::note 注意 +所有 QingStor 兼容的对象存储服务其 `--bucket` 选项的格式为 `http://.`。 +::: + +## 七牛云 Kodo + +使用七牛云 Kodo 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://developer.qiniu.com/af/kb/1479/how-to-access-or-locate-the-access-key-and-secret-key) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://.s3-.qiniucs.com`,请将 `` 替换成你实际使用的存储区域,例如:中国东部的区域代码为 `cn-east-1`。[点此查看](https://developer.qiniu.com/kodo/4088/s3-access-domainname) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage qiniu \ + --bucket https://.s3-.qiniucs.com \ + ... \ + myjfs +``` + +## 新浪云 SCS + +使用新浪云 SCS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://scs.sinacloud.com/doc/scs/guide/quick_start#accesskey) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项格式为 `https://.stor.sinaapp.com`。例如: + +```bash +$ juicefs format \ + --storage scs \ + --bucket https://.stor.sinaapp.com \ + ... \ + myjfs +``` + +## 天翼云 OOS + +使用天翼云 OOS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://www.ctyun.cn/help2/10000101/10473683) 了解如何创建 `Access key` 和 `Secret key`。 + +`--bucket` 选项的格式为 `https://.oss-.ctyunapi.cn`,请将 `` 替换成你实际使用的存储区域,例如:成都的区域代码为 `sccd`。[点此查看](https://www.ctyun.cn/help2/10000101/10474062) 所有可用的区域代码。例如: + +```bash +$ juicefs format \ + --storage oos \ + --bucket https://.oss-.ctyunapi.cn \ + ... \ + myjfs +``` + +## 移动云 EOS + +使用移动云 EOS 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://ecloud.10086.cn/op-help-center/doc/article/24501) 了解如何创建 `Access key` 和 `Secret key`。 + +移动云 EOS 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://ecloud.10086.cn/op-help-center/doc/article/40956),你可以根据实际的场景选用。例如: + +```bash +$ juicefs format \ + --storage eos \ + --bucket https://. \ + ... \ + myjfs +``` + +## 优刻得 US3 + +使用优刻得 US3 作为 JuiceFS 数据存储,请先参照 [这篇文档](https://docs.ucloud.cn/uai-censor/access/key) 了解如何创建 `Access key` 和 `Secret key`。 + +优刻得 US3(原名 UFile) 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://docs.ucloud.cn/ufile/introduction/region),你可以根据实际的场景选用。例如: + +```bash +$ juicefs format \ + --storage ufile \ + --bucket https://. \ + ... \ + myjfs +``` + +## Ceph RADOS + +:::note 注意 +JuiceFS 支持的 Ceph 最低版本是 Luminous(v12.2.*),请确认你的 Ceph 版本是否符合要求。 +::: + +[Ceph 存储集群](https://docs.ceph.com/en/latest/rados) 具有消息传递层协议,该协议使客户端能够与 Ceph Monitor 和 Ceph OSD 守护程序进行交互。[`librados`](https://docs.ceph.com/en/latest/rados/api/librados-intro) API 使您可以与这两种类型的守护程序进行交互: + +- [Ceph Monitor](https://docs.ceph.com/en/latest/rados/configuration/common/#monitors) 维护群集映射的主副本 +- [Ceph OSD Daemon (OSD)](https://docs.ceph.com/en/latest/rados/configuration/common/#osds) 将数据作为对象存储在存储节点上 + +JuiceFS 支持使用基于 `librados` 的本地 Ceph API。您需要分别安装 `librados` 库并重新编译 `juicefs` 二进制文件。 + +首先安装 `librados`: + +:::note 注意 +建议使用匹配你的 Ceph 版本的 `librados`,例如 Ceph 版本是 Octopus(v15.2.\*),那么 `librados` 也建议使用 v15.2.\* 版本。某些 Linux 发行版(如 CentOS 7)自带的 `librados` 版本可能较低,如果编译 JuiceFS 失败可以尝试下载更高版本的安装包。 +::: + +```bash +# Debian based system +$ sudo apt-get install librados-dev + +# RPM based system +$ sudo yum install librados2-devel +``` + +然后为 Ceph 编译 JuiceFS(要求 Go 1.16+ 和 GCC 5.4+): + +```bash +$ make juicefs.ceph +``` + +[存储池](https://docs.ceph.com/zh_CN/latest/rados/operations/pools) 是用于存储对象的逻辑分区,您可能需要首先创建一个存储池。 `--access-key` 选项的值是 Ceph 集群名称,默认集群名称是 `ceph`。` --secret-key` 选项的值是 [Ceph 客户端用户名](https://docs.ceph.com/en/latest/rados/operations/user-management),默认用户名是 `client.admin`。 + +为了连接到 Ceph Monitor,`librados` 将通过搜索默认位置读取 Ceph 的配置文件,并使用找到的第一个。 这些位置是: + +- `CEPH_CONF` 环境变量 +- `/etc/ceph/ceph.conf` +- `~/.ceph/config` +- 在当前工作目录中的 `ceph.conf` + +例如: + +```bash +$ juicefs.ceph format \ + --storage ceph \ + --bucket ceph:// \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## Ceph RGW + +[Ceph Object Gateway](https://ceph.io/ceph-storage/object-storage) 是在 `librados` 之上构建的对象存储接口,旨在为应用程序提供访问 Ceph 存储集群的 RESTful 网关。Ceph 对象网关支持 S3 兼容的接口,因此我们可以将 `--storage` 设置为 `s3`。 + +`--bucket` 选项的格式为 `http://.`(虚拟托管类型),例如: + +```bash +$ juicefs format \ + --storage s3 \ + --bucket http://. \ + ... \ + myjfs +``` + +## Swift + +[OpenStack Swift](https://github.com/openstack/swift) 是一种分布式对象存储系统,旨在从一台计算机扩展到数千台服务器。Swift 已针对多租户和高并发进行了优化。Swift 广泛适用于备份、Web 和移动内容的理想选择,可以无限量存储任何非结构化数据。 + +`--bucket` 选项格式为 `http://.`,`container` 用来设定对象的命名空间。 + +**当前,JuiceFS 仅支持 [Swift V1 authentication](https://www.swiftstack.com/docs/cookbooks/swift_usage/auth.html)。** + +`--access-key` 选项的值是用户名,`--secret-key` 选项的值是密码。例如: + +```bash +$ juicefs format \ + --storage swift \ + --bucket http://. \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## MinIO + +[MinIO](https://min.io) 是开源的轻量级对象存储,兼容 Amazon S3 API。 + +使用 Docker 可以很容易地在本地运行一个 MinIO 对象存储实例。例如,以下命令通过 `--console-address ":9900"` 为控制台设置并映射了 `9900` 端口,还将 MinIO 对象存储的数据路径映射到了当前目录下的 `minio-data` 文件夹中,你可以按需修改这些参数: + +```shell +$ sudo docker run -d --name minio \ + -p 9000:9000 \ + -p 9900:9900 \ + -e "MINIO_ROOT_USER=minioadmin" \ + -e "MINIO_ROOT_PASSWORD=minioadmin" \ + -v $PWD/minio-data:/data \ + --restart unless-stopped \ + minio/minio server /data --console-address ":9900" +``` + +容器创建成功以后使用以下地址访问: + +- **MinIO 管理界面**:[http://127.0.0.1:9900](http://127.0.0.1:9900/) +- **MinIO API**:[http://127.0.0.1:9000](http://127.0.0.1:9000/) + +对象存储初始的 Access Key 和 Secret Key 均为 `minioadmin`。 + +使用 MinIO 作为 JuiceFS 的数据存储,`--storage` 选项设置为 `minio`。 + +```bash +$ juicefs format \ + --storage minio \ + --bucket http://127.0.0.1:9000/ \ + --access-key minioadmin \ + --secret-key minioadmin \ + ... \ + myjfs +``` + +:::note 注意 +当前,JuiceFS 仅支持路径风格的 MinIO URI 地址,例如:`http://127.0.0.1:9000/myjfs`。 +::: + +## WebDAV + +[WebDAV](https://en.wikipedia.org/wiki/WebDAV) 是 HTTP 的扩展协议,有利于用户间协同编辑和管理存储在万维网服务器的文档。JuiceFS 0.15+ 支持使用 WebDAV 协议的存储系统作为后端数据存储。 + +你需要将 `--storage` 设置为 `webdav`,并通过 `--bucket` 来指定访问 WebDAV 的地址。如果存储系统启用了用户验证,用户名和密码可以通过 `--access-key` 和 `--secret-key` 来指定,例如: + +```bash +$ juicefs format \ + --storage webdav \ + --bucket http:/// \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## HDFS + +Hadoop 的文件系统 [HDFS](https://hadoop.apache.org) 也可以作为对象存储供 JuiceFS 使用。 + +当使用 HDFS 作为 JuiceFS 数据存储,`--access-key` 的值设置为用户名,默认的超级用户通常是 `hdfs`。例如: + +```bash +$ juicefs format \ + --storage hdfs \ + --bucket namenode1:8020 \ + --access-key hdfs \ + ... \ + myjfs +``` + +如果在创建文件系统时不指定 `--access-key`,JuiceFS 会使用执行 `juicefs mount` 命令的用户身份或通过 Hadoop SDK 访问 HDFS 的用户身份。如果该用户没有 HDFS 的读写权限,则程序会失败挂起,发生 IO 错误。 + +JuiceFS 会尝试基于 `$HADOOP_CONF_DIR` 或 `$HADOOP_HOME` 为 HDFS 客户端加载配置。如果 `--bucket` 选项留空,将使用在 Hadoop 配置中找到的默认 HDFS。 + +对于 HA 群集,可以像下面这样一起指定 NameNodes 的地址:`--bucket=namenode1:port,namenode2:port`。 + +## Redis + +Redis 既可以作为 JuiceFS 的元数据存储,也可以作为数据存储,但当使用 Redis 作为数据存储时,建议不要存储大规模数据。 + +`--bucket` 选项格式为 `redis://:/`。`--access-key` 选项的值是用户名,`--secret-key` 选项的值是密码。例如: + +```bash +$ juicefs format \ + --storage redis \ + --bucket redis://:/ \ + --access-key \ + --secret-key \ + ... \ + myjfs +``` + +## TiKV + +[TiKV](https://tikv.org) 是一个高度可扩展、低延迟且易于使用的键值数据库。它提供原始和符合 ACID 的事务键值 API。 + +TiKV 既可以用作 JuiceFS 的元数据存储,也可以用于 JuiceFS 的数据存储。 + +`--bucket` 选项格式类似 `:,:,:`,其中 `` 是 Placement Driver(PD)的地址。`--access-key` 和 `--secret-key` 选项没有作用,可以省略。例如: + +```bash +$ juicefs format \ + --storage tikv \ + --bucket ":,:,:" \ + ... \ + myjfs +``` + +## 本地磁盘 + +在创建 JuiceFS 文件系统时,如果没有指定任何存储类型,会默认使用本地磁盘作为数据存储,root 用户默认存储路径为 `/var/jfs`,普通用户默认存储路径为 `~/.juicefs/local`。 + +例如,以下命令使用本地的 Redis 数据库和本地磁盘创建了一个名为 `myfs` 的文件系统: + +```shell +$ juicefs format redis://localhost:6379/1 myjfs +``` + +本地存储通常仅用于了解和体验 JuiceFS 的基本功能,创建的 JuiceFS 存储无法被网络内的其他客户端挂载,只能单机使用。 diff --git a/docs/zh_cn/reference/p8s_metrics.md b/docs/zh_cn/reference/p8s_metrics.md new file mode 100644 index 0000000..71b0e81 --- /dev/null +++ b/docs/zh_cn/reference/p8s_metrics.md @@ -0,0 +1,118 @@ +--- +sidebar_label: JuiceFS 监控指标 +sidebar_position: 2 +slug: /p8s_metrics +--- + +# JuiceFS 监控指标 + +:::tip 提示 +请查看[「监控」](../administration/monitoring.md)文档了解如何收集及展示 JuiceFS 监控指标 +::: + +## 全局标签 + +| 名称 | 描述 | +| ---- | ----------- | +| `vol_name` | Volume 名称 | +| `mp` | 挂载点路径 | + +:::info 说明 +Prometheus 在抓取监控指标时会自动附加 `instance` 标签以帮助识别不同的抓取目标,格式为 `:`。详见[官方文档](https://prometheus.io/docs/concepts/jobs_instances)。 +::: + +:::info 说明 +如果是通过 [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) 的方式上报监控指标(例如 [JuiceFS Hadoop Java SDK](../administration/monitoring.md#hadoop)),`mp` 标签的值为 `sdk-`,`instance` 标签的值为主机名。 +::: + +## 文件系统 + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_used_space` | 总使用空间 | 字节 | +| `juicefs_used_inodes` | 总 inodes 数量 | | + +## 操作系统 + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_uptime` | 总运行时间 | 秒 | +| `juicefs_cpu_usage` | CPU 使用量 | 秒 | +| `juicefs_memory` | 内存使用量 | 字节 | + +## 元数据引擎 + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_transaction_durations_histogram_seconds` | 事务的延时分布 | 秒 | +| `juicefs_transaction_restart` | 事务重启的次数 | | + +## FUSE + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_fuse_read_size_bytes` | 读请求的大小分布 | 字节 | +| `juicefs_fuse_written_size_bytes` | 写请求的大小分布 | 字节 | +| `juicefs_fuse_ops_durations_histogram_seconds` | 所有请求的延时分布 | 秒 | +| `juicefs_fuse_open_handlers` | 打开的文件和目录数量 | | + +## SDK + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_sdk_read_size_bytes` | 读请求的大小分布 | 字节 | +| `juicefs_sdk_written_size_bytes` | 写请求的大小分布 | 字节 | +| `juicefs_sdk_ops_durations_histogram_seconds` | 所有请求的延时分布 | 秒 | + +## 缓存 + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_blockcache_blocks` | 缓存块的总个数 | | +| `juicefs_blockcache_bytes` | 缓存块的总大小 | 字节 | +| `juicefs_blockcache_hits` | 命中缓存块的总次数 | | +| `juicefs_blockcache_miss` | 没有命中缓存块的总次数 | | +| `juicefs_blockcache_writes` | 写入缓存块的总次数 | | +| `juicefs_blockcache_drops` | 丢弃缓存块的总次数 | | +| `juicefs_blockcache_evicts` | 淘汰缓存块的总次数 | | +| `juicefs_blockcache_hit_bytes` | 命中缓存块的总大小 | 字节 | +| `juicefs_blockcache_miss_bytes` | 没有命中缓存块的总大小 | 字节 | +| `juicefs_blockcache_write_bytes` | 写入缓存块的总大小 | 字节 | +| `juicefs_blockcache_read_hist_seconds` | 读缓存块的延时分布 | 秒 | +| `juicefs_blockcache_write_hist_seconds` | 写缓存块的延时分布 | 秒 | + +## 对象存储 + +### 标签 + +| 名称 | 描述 | +| ---- | ----------- | +| `method` | 请求对象存储的方法(例如 GET、PUT、HEAD、DELETE) | + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_object_request_durations_histogram_seconds` | 请求对象存储的延时分布 | 秒 | +| `juicefs_object_request_errors` | 请求失败的总次数 | | +| `juicefs_object_request_data_bytes` | 请求对象存储的总数据大小 | 字节 | + +## 内部特性 + +### 指标 + +| 名称 | 描述 | 单位 | +| ---- | ----------- | ---- | +| `juicefs_compact_size_histogram_bytes` | 合并数据的大小分布 | 字节 | diff --git a/docs/zh_cn/reference/posix_compatibility.md b/docs/zh_cn/reference/posix_compatibility.md new file mode 100644 index 0000000..f3eb5f1 --- /dev/null +++ b/docs/zh_cn/reference/posix_compatibility.md @@ -0,0 +1,287 @@ +--- +sidebar_label: POSIX 兼容性 +sidebar_position: 6 +slug: /posix_compatibility +--- +# POSIX 兼容性 + +JuiceFS 借助于 pjdfstest 和 LTP 来验证其对 POSIX 的兼容性。 + +## Pjdfstest + +[Pjdfstest](https://github.com/pjd/pjdfstest) 是一个用来帮助验证 POSIX 系统调用的测试集,JuiceFS 通过了其最新的 8813 项测试: + +``` +All tests successful. + +Test Summary Report +------------------- +/root/soft/pjdfstest/tests/chown/00.t (Wstat: 0 Tests: 1323 Failed: 0) + TODO passed: 693, 697, 708-709, 714-715, 729, 733 +Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr 0.38 sys + 2.57 cusr 3.93 csys = 9.65 CPU) +Result: PASS +``` + +此外,JuiceFS 还提供: + +- 关闭再打开(close-to-open)一致性。一旦一个文件写入完成并关闭,之后的打开和读操作保证可以访问之前写入的数据。如果是在同一个挂载点,所有写入的数据都可以立即读。 +- 重命名以及所有其他元数据操作都是原子的,由元数据引擎的事务机制保证。 +- 当文件被删除后,同一个挂载点上如果已经打开了,文件还可以继续访问。 +- 支持 mmap +- 支持 fallocate 以及空洞 +- 支持扩展属性 +- 支持 BSD 锁(flock) +- 支持 POSIX 记录锁(fcntl) + +## LTP + +[LTP](https://github.com/linux-test-project/ltp)(Linux Test Project)是一个由 IBM,Cisco 等多家公司联合开发维护的项目,旨在为开源社区提供一个验证 Linux 可靠性和稳定性的测试集。LTP 中包含了各种工具来检验 Linux 内核和相关特性;JuiceFS 通过了其中与文件系统相关的大部分测试例。 + +### 测试环境 + +- 测试主机: Amazon EC2: c5d.xlarge (4C 8G) +- 操作系统: Ubuntu 20.04.1 LTS (Kernel 5.4.0-1029-aws) +- 对象存储: Amazon S3 +- JuiceFS 版本: 0.17-dev (2021-09-16 292f2b65) + +### 测试步骤 + +1. 在 GitHub 下载 LTP [源码包](https://github.com/linux-test-project/ltp/releases/download/20210524/ltp-full-20210524.tar.bz2) +2. 解压后编译安装: + +```bash +$ tar -jvxf ltp-full-20210524.tar.bz2 +$ cd ltp-full-20210524 +$ ./configure +$ make all +$ make install +``` + +3. 测试工具安装在 `/opt/ltp`,需先切换到此目录: + +```bash +$ cd /opt/ltp +``` + +测试配置文件在 `runtest` 目录下;为方便测试,删去了 `fs` 和 `syscalls` 中部分压力测试和与文件系统不想关的条目(参见[附录](#附录),修改后保存到文件 `fs-jfs` 和 `syscalls-jfs`),然后执行命令: + +```bash +$ ./runltp -d /mnt/jfs -f fs_bind,fs_perms_simple,fsx,io,smoketest,fs-jfs,syscalls-jfs +``` + +### 测试结果 + +```bash +Testcase Result Exit Value +-------- ------ ---------- +fcntl17 FAIL 7 +fcntl17_64 FAIL 7 +getxattr05 CONF 32 +ioctl_loop05 FAIL 4 +ioctl_ns07 FAIL 1 +lseek11 CONF 32 +open14 CONF 32 +openat03 CONF 32 +setxattr03 FAIL 6 + +----------------------------------------------- +Total Tests: 1270 +Total Skipped Tests: 4 +Total Failures: 5 +Kernel Version: 5.4.0-1029-aws +Machine Architecture: x86_64 +``` + +其中跳过和失败的测试例原因如下: + +- fcntl17,fcntl17_64:在 POSIX locks 加锁时需要文件系统自动检测死锁,目前 JuiceFS 尚不支持 +- getxattr05:需要设置 ACL,目前 JuiceFS 尚不支持 +- ioctl_loop05,ioctl_ns07,setxattr03:需要调用 `ioctl`,目前 JuiceFS 尚不支持 +- lseek11:需要 `lseek` 处理 SEEK_DATA 和 SEEK_HOLE 标记位,目前 JuiceFS 用的是内核通用实现,尚不支持这两个 flags +- open14,openat03:需要 `open` 处理 O_TMPFILE 标记位,由于 FUSE 不支持,JuiceFS 也无法实现 + +### 附录 + +在 `fs` 和 `syscalls` 文件中删去的测试例: + +```bash +# fs --> fs-jfs +gf01 growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 -f glseek20 -S 2 -d $TMPDIR +gf02 growfiles -W gf02 -b -e 1 -L 10 -i 100 -I p -S 2 -u -f gf03_ -d $TMPDIR +gf03 growfiles -W gf03 -b -e 1 -g 1 -i 1 -S 150 -u -f gf05_ -d $TMPDIR +gf04 growfiles -W gf04 -b -e 1 -g 4090 -i 500 -t 39000 -u -f gf06_ -d $TMPDIR +gf05 growfiles -W gf05 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -u -f gf07_ -d $TMPDIR +gf06 growfiles -W gf06 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -f g_rand10 -S 2 -d $TMPDIR +gf07 growfiles -W gf07 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -I p -f g_rand13 -S 2 -d $TMPDIR +gf08 growfiles -W gf08 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -f g_rand11 -S 2 -d $TMPDIR +gf09 growfiles -W gf09 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -I p -f g_rand12 -S 2 -d $TMPDIR +gf10 growfiles -W gf10 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I l -f g_lio14 -S 2 -d $TMPDIR +gf11 growfiles -W gf11 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I L -f g_lio15 -S 2 -d $TMPDIR +gf12 mkfifo $TMPDIR/gffifo17; growfiles -b -W gf12 -e 1 -u -i 0 -L 30 $TMPDIR/gffifo17 +gf13 mkfifo $TMPDIR/gffifo18; growfiles -b -W gf13 -e 1 -u -i 0 -L 30 -I r -r 1-4096 $TMPDIR/gffifo18 +gf14 growfiles -W gf14 -b -e 1 -u -i 0 -L 20 -w -l -C 1 -T 10 -f glseek19 -S 2 -d $TMPDIR +gf15 growfiles -W gf15 -b -e 1 -u -r 1-49600 -I r -u -i 0 -L 120 -f Lgfile1 -d $TMPDIR +gf16 growfiles -W gf16 -b -e 1 -i 0 -L 120 -u -g 4090 -T 101 -t 408990 -l -C 10 -c 1000 -S 10 -f Lgf02_ -d $TMPDIR +gf17 growfiles -W gf17 -b -e 1 -i 0 -L 120 -u -g 5000 -T 101 -t 499990 -l -C 10 -c 1000 -S 10 -f Lgf03_ -d $TMPDIR +gf18 growfiles -W gf18 -b -e 1 -i 0 -L 120 -w -u -r 10-5000 -I r -l -S 2 -f Lgf04_ -d $TMPDIR +gf19 growfiles -W gf19 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -o O_RDWR,O_CREAT,O_TRUNC -u -f gf08i_ -d $TMPDIR +gf20 growfiles -W gf20 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1-256000:512 -R 512-256000 -T 4 -f gfbigio-$$ -d $TMPDIR +gf21 growfiles -W gf21 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bld-$$ -d $TMPDIR +gf22 growfiles -W gf22 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bldf-$$ -d $TMPDIR +gf23 growfiles -W gf23 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 512-64000:1024 -R 1-384000 -T 4 -f gf-inf-$$ -d $TMPDIR +gf24 growfiles -W gf24 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -f gf-jbld-$$ -d $TMPDIR +gf25 growfiles -W gf25 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1024000-2048000:2048 -R 4095-2048000 -T 1 -f gf-large-gs-$$ -d $TMPDIR +gf26 growfiles -W gf26 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 128-32768:128 -R 512-64000 -T 4 -f gfsmallio-$$ -d $TMPDIR +gf27 growfiles -W gf27 -b -D 0 -w -g 8b -C 1 -b -i 1000 -u -f gfsparse-1-$$ -d $TMPDIR +gf28 growfiles -W gf28 -b -D 0 -w -g 16b -C 1 -b -i 1000 -u -f gfsparse-2-$$ -d $TMPDIR +gf29 growfiles -W gf29 -b -D 0 -r 1-4096 -R 0-33554432 -i 0 -L 60 -C 1 -u -f gfsparse-3-$$ -d $TMPDIR +gf30 growfiles -W gf30 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -o O_RDWR,O_CREAT,O_SYNC -g 20480 -T 10 -t 20480 -f gf-sync-$$ -d $TMPDIR +rwtest01 export LTPROOT; rwtest -N rwtest01 -c -q -i 60s -f sync 10%25000:$TMPDIR/rw-sync-$$ +rwtest02 export LTPROOT; rwtest -N rwtest02 -c -q -i 60s -f buffered 10%25000:$TMPDIR/rw-buffered-$$ +rwtest03 export LTPROOT; rwtest -N rwtest03 -c -q -i 60s -n 2 -f buffered -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-buff-$$ +rwtest04 export LTPROOT; rwtest -N rwtest04 -c -q -i 60s -n 2 -f sync -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-sync-$$ +rwtest05 export LTPROOT; rwtest -N rwtest05 -c -q -i 50 -T 64b 500b:$TMPDIR/rwtest01%f +iogen01 export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$ +quota_remount_test01 quota_remount_test01.sh +isofs isofs.sh + +# syscalls --> syscalls-jfs +bpf_prog05 bpf_prog05 +cacheflush01 cacheflush01 +chown01_16 chown01_16 +chown02_16 chown02_16 +chown03_16 chown03_16 +chown04_16 chown04_16 +chown05_16 chown05_16 +clock_nanosleep03 clock_nanosleep03 +clock_gettime03 clock_gettime03 +leapsec01 leapsec01 +close_range01 close_range01 +close_range02 close_range02 +fallocate06 fallocate06 +fchown01_16 fchown01_16 +fchown02_16 fchown02_16 +fchown03_16 fchown03_16 +fchown04_16 fchown04_16 +fchown05_16 fchown05_16 +fcntl06 fcntl06 +fcntl06_64 fcntl06_64 +getegid01_16 getegid01_16 +getegid02_16 getegid02_16 +geteuid01_16 geteuid01_16 +geteuid02_16 geteuid02_16 +getgid01_16 getgid01_16 +getgid03_16 getgid03_16 +getgroups01_16 getgroups01_16 +getgroups03_16 getgroups03_16 +getresgid01_16 getresgid01_16 +getresgid02_16 getresgid02_16 +getresgid03_16 getresgid03_16 +getresuid01_16 getresuid01_16 +getresuid02_16 getresuid02_16 +getresuid03_16 getresuid03_16 +getrusage04 getrusage04 +getuid01_16 getuid01_16 +getuid03_16 getuid03_16 +ioctl_sg01 ioctl_sg01 +fanotify16 fanotify16 +fanotify18 fanotify18 +fanotify19 fanotify19 +lchown01_16 lchown01_16 +lchown02_16 lchown02_16 +lchown03_16 lchown03_16 +mbind02 mbind02 +mbind03 mbind03 +mbind04 mbind04 +migrate_pages02 migrate_pages02 +migrate_pages03 migrate_pages03 +modify_ldt01 modify_ldt01 +modify_ldt02 modify_ldt02 +modify_ldt03 modify_ldt03 +move_pages01 move_pages01 +move_pages02 move_pages02 +move_pages03 move_pages03 +move_pages04 move_pages04 +move_pages05 move_pages05 +move_pages06 move_pages06 +move_pages07 move_pages07 +move_pages09 move_pages09 +move_pages10 move_pages10 +move_pages11 move_pages11 +move_pages12 move_pages12 +msgctl05 msgctl05 +msgstress04 msgstress04 +openat201 openat201 +openat202 openat202 +openat203 openat203 +madvise06 madvise06 +madvise09 madvise09 +ptrace04 ptrace04 +quotactl01 quotactl01 +quotactl04 quotactl04 +quotactl06 quotactl06 +readdir21 readdir21 +recvmsg03 recvmsg03 +sbrk03 sbrk03 +semctl08 semctl08 +semctl09 semctl09 +set_mempolicy01 set_mempolicy01 +set_mempolicy02 set_mempolicy02 +set_mempolicy03 set_mempolicy03 +set_mempolicy04 set_mempolicy04 +set_thread_area01 set_thread_area01 +setfsgid01_16 setfsgid01_16 +setfsgid02_16 setfsgid02_16 +setfsgid03_16 setfsgid03_16 +setfsuid01_16 setfsuid01_16 +setfsuid02_16 setfsuid02_16 +setfsuid03_16 setfsuid03_16 +setfsuid04_16 setfsuid04_16 +setgid01_16 setgid01_16 +setgid02_16 setgid02_16 +setgid03_16 setgid03_16 +sgetmask01 sgetmask01 +setgroups01_16 setgroups01_16 +setgroups02_16 setgroups02_16 +setgroups03_16 setgroups03_16 +setgroups04_16 setgroups04_16 +setregid01_16 setregid01_16 +setregid02_16 setregid02_16 +setregid03_16 setregid03_16 +setregid04_16 setregid04_16 +setresgid01_16 setresgid01_16 +setresgid02_16 setresgid02_16 +setresgid03_16 setresgid03_16 +setresgid04_16 setresgid04_16 +setresuid01_16 setresuid01_16 +setresuid02_16 setresuid02_16 +setresuid03_16 setresuid03_16 +setresuid04_16 setresuid04_16 +setresuid05_16 setresuid05_16 +setreuid01_16 setreuid01_16 +setreuid02_16 setreuid02_16 +setreuid03_16 setreuid03_16 +setreuid04_16 setreuid04_16 +setreuid05_16 setreuid05_16 +setreuid06_16 setreuid06_16 +setreuid07_16 setreuid07_16 +setuid01_16 setuid01_16 +setuid03_16 setuid03_16 +setuid04_16 setuid04_16 +shmctl06 shmctl06 +socketcall01 socketcall01 +socketcall02 socketcall02 +socketcall03 socketcall03 +ssetmask01 ssetmask01 +swapoff01 swapoff01 +swapoff02 swapoff02 +swapon01 swapon01 +swapon02 swapon02 +swapon03 swapon03 +switch01 endian_switch01 +sysinfo03 sysinfo03 +timerfd04 timerfd04 +perf_event_open02 perf_event_open02 +statx07 statx07 +io_uring02 io_uring02 +``` diff --git a/docs/zh_cn/release_notes.md b/docs/zh_cn/release_notes.md new file mode 100644 index 0000000..8e8851e --- /dev/null +++ b/docs/zh_cn/release_notes.md @@ -0,0 +1,4 @@ +# 发行注记 + +[点此查看](https://github.com/juicedata/juicefs/releases) JuiceFS 版本变更信息。 + diff --git a/docs/zh_cn/security/_data_protection.md b/docs/zh_cn/security/_data_protection.md new file mode 100644 index 0000000..4df9b18 --- /dev/null +++ b/docs/zh_cn/security/_data_protection.md @@ -0,0 +1,5 @@ +--- +sidebar_label: 数据保护 +--- + +# 数据保护 diff --git a/docs/zh_cn/security/encrypt.md b/docs/zh_cn/security/encrypt.md new file mode 100644 index 0000000..05cb7e0 --- /dev/null +++ b/docs/zh_cn/security/encrypt.md @@ -0,0 +1,69 @@ +# 数据加密 + +## 传输中数据加密 + +JuiceFS 在网络上传输时会对数据进行加密,以防止未经授权的用户窃听网络通信。 + +JuiceFS 客户端始终使用 HTTPS 把数据上传到对象存储服务,以下情况除外: + +- 使用内部端点上传至阿里云 OSS +- 使用内部端点上传至 UCloud US3 + + +## 静态数据加密 + +JuiceFS 支持静态数据加密,即在上传到对象存储之前对数据进行加密。在这种情况下,存储在对象存储中的数据将会被加密,这可以在对象存储本身被破坏时有效地防止数据泄露。 + +JuiceFS 在客户端加密中采用了行业标准的加密方式(AES-GCM 和 RSA)。加密和解密是在 JuiceFS 客户端进行的。用户唯一需要做的是在 JuiceFS 挂载时提供一个 RSA 私钥或密码。之后便可以并像普通文件系统一样使用它,它对应用程序是完全透明的。 + +> **注意**:在客户端缓存的数据是**不**加密的。不过,只有 root 用户或所有者可以访问这些数据。如果要把缓存的数据也加密,你可以把缓存目录放在一个加密的文件系统或块存储中。 + + +### 加密和解密方法 + +必须为每个加密的文件系统创建一个全局 RSA 私钥 `M`。在对象存储中保存的每个对象都将有自己的随机对称密钥 `S`。数据用对称密钥 `S` 进行 AES-GCM 加密,`S` 用全局 RSA 私钥 `M` 进行加密,RSA 私钥使用用户指定的口令进行加密。 + +![Encryption At-rest](../images/encryption.png) + +数据加密的详细过程如下: + +- 在写入对象存储之前,数据块会使用 LZ4 或 ZStandard 进行压缩。 +- 为每个数据块生成一个随机的 256 位对称密钥 `S` 和一个随机种子 `N`。 +- 基于 AES-GCM 使用 `S` 和 `N` 对每个数据块进行加密得到 `encrypted_data`。 +- 为了避免对称密钥 `S` 在网络上明文传输,使用 RSA 公钥 `M` 对对称密钥 `S` 进行加密得到密文 `K` 。 +- 将加密后的数据 `encrypted_data`、密文 `K` 和随机种子 `N` 组合成对象,然后写入对象存储。 + +数据解密的步骤如下: + +- 读取整个加密对象(它可能比 4MB 大一点)。 +- 解析对象数据得到密文 `K`、随机种子 `N` 和被加密的数据 `encrypted_data`。 +- 用 RSA 私钥解密 `K`,得到对称密钥 `S`。 +- 基于 AES-GCM 使用 `S` 和 `N` 解密数据 `encrypted_data` 得到数据块明文。 +- 对数据块解压缩。 + + +### 密钥管理 + +在启用静态加密功能时,RSA 私钥的安全是极其重要的。如果该私钥被泄露,可能会导致数据泄露。如果该私钥丢失,那么**所有**的加密数据都将丢失,而且无法恢复。 + +当使用 `juicefs format` 创建一个新卷时,可以通过 `--encrypt-rsa-key` 参数指定 RSA 私钥来启用静态加密,该私钥将会被保存到 Redis。当私钥被密码保护时,可以使用环境变量 `JFS_RSA_PASSPHRASE` 来指定密码。 + +使用方法: + +1. 生成 RSA 密钥 + +```shell +$ openssl genrsa -out my-priv-key.pem -aes256 2048 +``` + +2. 在格式化时提供该密钥 + +```shell +$ juicefs format --encrypt-rsa-key my-priv-key.pem META-URL NAME +``` + +> **注意**:如果私钥受密码保护,在执行 `juicefs mount` 时应使用名为`JFS_RSA_PASSPHRASE`的环境变量来指定该密码。 + + +### 性能 +TLS、HTTPS 和 AES-256 在现代 CPU 中的实现非常高效。因此,启用加密功能对文件系统的性能影响并不大。RSA 算法相对较慢,特别是解密过程。建议在存储加密中使用 2048 位 RSA 密钥。使用 4096 位密钥可能会对读取性能产生重大影响。 diff --git a/docs/zh_cn/security/trash.md b/docs/zh_cn/security/trash.md new file mode 100644 index 0000000..ef3c3d7 --- /dev/null +++ b/docs/zh_cn/security/trash.md @@ -0,0 +1,62 @@ +# 回收站 + +:::note 注意 +此特性需要使用 1.0.0 及以上版本的 JuiceFS +::: + +对存储系统来说,数据的安全性永远是其需要考虑的关键要素之一。因此,JuiceFS 设计并默认开启了**回收站**功能,会自动将用户删除的文件移动到隐藏的回收站目录内,保留一段时间后才将数据真正清理。 + +## 配置 + +用户在初始化(即执行 `format` 命令)文件系统时,可以通过 `--trash-days` 参数来设置文件在回收站内保留的时间。在此时间段内,应用删除的文件数据不会被真正清理,因此通过 `df` 命令看到的文件系统使用量并不会减少,对象存储中的对象也会依然存在。 + +- 此参数默认值为 1,意味着回收站内文件会在一天后被自动清理。 +- 将此参数值设为 0 即可禁用回收站功能,系统会在短时间内清空回收站,并使得后续应用删除的文件能被立即清理。 +- 旧版本 JuiceFS 欲使用回收站,需要在升级所有挂载点后通过 `config` 命令手动将 `--trash-days` 改为需要的正整数值。 + +如果是已完成初始化的文件系统,可以继续通过 `config` 命令更新回收站保留时间,例如: + +```bash +$ juicefs config META-URL --trash-days 7 +``` + +然后通过 `status` 命令验证配置更新成功: + +```bash +$ juicefs status META-URL + +{ + "Setting": { + ... + "TrashDays": 7 + } +} +``` + +## 使用 + +回收站本身是一个名为 `.trash` 的目录,会被自动创建在根目录 `/` 下。 + +### 组织形式 + +回收站内固定只有两级深度。第一级为根据时间以 `年-月-日-小时` 格式命名的目录(如 `2021-11-30-10`),系统会自动创建它,并将在这个小时内删除的所有文件都放在此目录下。第二级即为平铺的用户文件和空目录(通常的 `rm -r ` 命令实际会先逐个删除目标目录下文件,再删除空目录)。回收站内不再保留原来的目录树结构,为了能在不影响正常操作性能的前提下,尽可能提供恢复原树型结构的信息,回收站内的文件被自动重命名成 `{父目录 inode}-{文件 inode}-{原始文件名}` 格式。其中 `inode` 是文件系统内部的管理信息,如果用户并不需求文件原始路径,则直接关注最后的原始文件名即可。 + +:::note 注意 +第一级目录的命名取自 UTC 时间,与中国北京时间相差 8 个小时。 +::: + +:::tip 提示 +你可以通过 `juicefs info` 命令查看目录或文件的 inode +::: + +### 访问权限 + +所有用户均有权限浏览回收站,并且可以看到其他用户删除的文件列表。然而,由于回收站内的文件依然保留了其原来的权限属性,因此用户仅能读取其原本就有权限读取的文件。当文件系统使用子目录挂载模式(挂载时指定了 `--subdir` 选项,通常在 Kubernetes 上作为 CSI 驱动使用)时,回收站将会被彻底隐藏。 + +回收站内不允许用户自行创建新的文件,非 root 用户也无法删除或移动其中的文件,即使他/她是这个文件的所有者。 + +### 恢复/清理 + +文件的恢复通常建议由 root 用户来执行,其被允许直接使用类似 `mv` 的命令将文件移出回收站,而不需要任何的数据拷贝。对于普通用户而言,其仅能通过读取拥有访问权限的文件再写入到新文件的方式来达到类似恢复的效果。 + +回收站的清理由 JuiceFS 客户端自动执行,因此需要至少有 1 个在线的挂载点,默认清理周期是每小时清理 1 次。如果需要手动清理部分条目,同样需要由 root 用户来执行。 diff --git a/docs/zh_cn/tutorials/aliyun.md b/docs/zh_cn/tutorials/aliyun.md new file mode 100644 index 0000000..b8f0810 --- /dev/null +++ b/docs/zh_cn/tutorials/aliyun.md @@ -0,0 +1,303 @@ +--- +sidebar_label: 在阿里云使用 JuiceFS +sidebar_position: 4 +slug: /clouds/aliyun +--- + +# 在阿里云安装和使用 JuiceFS 存储 + +如下图所示,JuiceFS 存储由数据库和对象存储共同驱动。存入 JuiceFS 的文件会按照一定的规则被拆分成固定大小的数据块存储在对象存储中,数据对应的元数据则会存储在数据库中。 + +元数据完全独立存储,对文件的检索和处理并不会直接操作对象存储中的数据,而是先在数据库中操作元数据,只有当数据发生变化的时候,才会与对象存储交互。 + +这样的设计可以有效缩减对象存储在请求数量上的费用,同时也能让我们显著感受到 JuiceFS 带来的性能提升。 + +![](../images/juicefs-aliyun.png) + +## 准备 + +通过前面的架构描述,可以知道 JuiceFS 需要搭配数据库和对象存储一起使用。这里我们直接使用阿里云的 ECS 云服务器,结合云数据库和 OSS 对象存储。 + +在创建云计算资源时,尽量选择在相同的区域,这样可以让资源之间通过内网线路相互访问,避免使用公网线路产生额外的流量费用。 + +### 一、云服务器 ECS + +JuiceFS 对服务器硬件没有特殊要求,一般来说,云平台上最低配的云服务器也能稳定使用 JuiceFS,通常你只需要选择能够满足自身业务的配置即可。 + +需要特别说明的是,你不需要为使用 JuiceFS 重新购买服务器或是重装系统,JuiceFS 没有业务入侵性,不会对你现有的系统和程序造成任何的干扰,你完全可以在正在运行的服务器上安装和使用 JuiceFS。 + +JuiceFS 默认会占用不超过 1GB 的硬盘空间作为缓存,可以根据需要调整缓存空间的大小。该缓存是客户端与对象存储之间的一个数据缓冲层,选择性能更好的云盘,可以获得更好的性能表现。 + +在操作系统方面,阿里云 ECS 提供的所有操作系统都可以安装 JuiceFS。 + +**本文使用的 ECS 配置如下:** + +| **实例规格** | ecs.t5-lc1m1.small | +| ---------------- | ------------------------ | +| **CPU** | 1 核 | +| **内存** | 1 GB | +| **存储** | 40 GB | +| **操作系统** | Ubuntu Server 20.04 64位 | +| **地域及可用区** | 华东 2(上海) | + +### 二、云数据库 + +JuiceFS 会将数据对应的元数据全部存储在独立的数据库中,目前已开放支持的数据库有 Redis、MySQL、PostgreSQL 和 SQLite。 + +根据数据库类型的不同,带来的元数据性能和可靠性表现也各不相同。比如 Redis 是完全运行在内存上的,它能提供极致的性能,但运维难度较高,可靠性相对低。而 MySQL、PostgreSQL 是关系型数据库,性能不如 Redis,但运维难度不高,可靠性也有一定的保障。SQLite 是单机单文件关系型数据库,性能较低,也不适合用于大规模数据存储,但它免配置,适合单机少量数据存储的场景。 + +如果只是为了评估 JuiceFS 的功能,你可以在 ECS 云服务器手动搭建数据库使用。当你要在生产环境使用 JucieFS 时,如果没有专业的数据库运维团队,阿里云的云数据库服务通常是更好的选择。 + +当然,如果你愿意,也可以使用其他云平台上提供的云数据库服务。但在这种情况下,你只能通过公网访问云数据库,也就是说,你必须向公网暴露数据库的端口,这存在极大的安全风险,最好不要这样使用。 + +如果必须通过公网访问数据库,可以通过云数据库控台提供的白名单功能,严格限制允许访问数据库的 IP 地址,从而提升数据的安全性。从另一个角度说,如果你通过公网无法成功连接云数据库,那么可以检查数据库的白名单,检查是不是该设置限制了你的访问。 + +| 数据库 | Redis | MySQL、PostgreSQL | SQLite | +| :----------: | :----------------------: | :------------------------: | :--------------------: | +| **性能** | 强 | 适中 | 弱 | +| **运维门槛** | 高 | 适中 | 低 | +| **可靠性** | 低 | 适中 | 低 | +| **应用场景** | 海量数据、分布式高频读写 | 海量数据、分布式中低频读写 | 少量数据单机中低频读写 | + +> **注意**:如果使用 JuiceFS 的[托管服务](https://juicefs.com/docs/zh/hosted_service.html),则无需单独准备数据库。 + +**本文使用了[云数据 Redis 版](https://www.aliyun.com/product/kvstore),以下连接地址只是为了演示目的编制的伪地址:** + +| Redis 版本 | 5.0 社区版 | +| ------------ | ------------------------------------ | +| **实例规格** | 256M 标准版-单副本 | +| **连接地址** | herald-sh-abc.redis.rds.aliyuncs.com | +| **可用区** | 上海 | + +### 三、对象存储 OSS + +JuiceFS 会将所有的数据都存储到对象存储中,它支持几乎所有的对象存储服务。但为了获得最佳的性能,当使用阿里云 ECS 时,搭配阿里云 OSS 对象存储通常是最优选择。不过请注意,将 ECS 和 OSS Bucket 选择在相同的地区,这样才能通过阿里云的内网线路进行访问,不但延时低,而且不需要额外的流量费用。 + +当然,如果你愿意,也可以使用其他云平台提供的对象存储服务,但不推荐这样做。首先,通过阿里云 ECS 访问其他云平台的对象存储要走公网线路,对象存储会产生流量费用,而且这样的访问延时相比也会更高,可能会影响 JuiceFS 的性能发挥。 + +阿里云 OSS 有不同的存储级别,由于 JuiceFS 需要与对象存储频繁交互,建议使用标准存储。你可以搭配 OSS 资源包使用,降低对象存储的使用成本。 + +### API 访问秘钥 + +阿里云 OSS 需要通过 API 进行访问,你需要准备访问秘钥,包括 `Access Key ID` 和 `Access Key Secret` ,[点此查看](https://help.aliyun.com/document_detail/38738.html)获取方式。 + +> **安全建议**:显式使用 API 访问秘钥可能导致密钥泄露,推荐为云服务器分配 [RAM 服务角色](https://help.aliyun.com/document_detail/93689.htm)。当一台 ECS 被授予 OSS 操作权限以后,无需使用 API 访问秘钥即可访问 OSS。 + +## 安装 + +我们当前使用的是 Ubuntu Server 20.04 64 位系统,依次执行以下命令可以下载最新版本客户端。你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本。 + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +下载完成以后,解压程序到 `juice` 文件夹: + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +将 JuiceFS 客户端安装到 `/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +执行命令,看到返回 `juicefs` 的命令帮助信息,代表客户端安装成功。 + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.15.2 (2021-07-07T05:51:36Z 4c16847) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +JuiceFS 具有良好的跨平台兼容性,同时支持在 Linux、Windows 和 macOS 上使用。本文着重介绍 JuiceFS 在 Linux 系统上的安装和使用,如果你需要了解其他系统上的安装方法,请[查阅文档](../getting-started/installation.md)。 + +## 创建 JuiceFS 存储 + +JuiceFS 客户端安装好以后,现在就可以使用前面准备好的 Redis 数据库和 OSS 对象存储来创建 JuiceFS 存储了。 + +严格意义上说,这一步操作应该叫做 “Format a volume”,即格式化一个卷。但考虑到有很多用户可能不了解或者不关心文件系统的标准术语,所以简单起见,我们就直白的把这个过程叫做“创建 JuiceFS 存储”。 + +以下命令使用 JuiceFS 客户端提供的 `format` 子命令创建了一个名为 `mystor` 的存储,即文件系统: + +```shell +$ juicefs format \ + --storage oss \ + --bucket https:// \ + --access-key \ + --secret-key \ + redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + mystor +``` + +**选项说明:** + +- `--storage`:指定对象存储类型,[点此查看](../reference/how_to_setup_object_storage.md#%E6%94%AF%E6%8C%81%E7%9A%84%E5%AD%98%E5%82%A8%E6%9C%8D%E5%8A%A1) JuiceFS 支持的对象存储。 +- `--bucket`:对象存储的 Bucket 域名。当使用阿里云 OSS 时,只需填写 bucket 名称即可,无需填写完整的域名,JuiceFS 会自动识别并补全地址。 +- `--access-key` 和 `--secret-key`:访问对象存储 API 的秘钥对,[点此查看](https://help.aliyun.com/document_detail/38738.html)获取方式。 + +> Redis 6.0 身份认证需要用户名和密码两个参数,地址格式为 `redis://username:password@redis-server-url:6379/1`。目前阿里云数据库 Redis 版只提供 Reids 4.0 和 5.0 两个版本,认证身份只需要密码,在设置 Redis 服务器地址时只需留空用户名即可,例如:`redis://:password@redis-server-url:6379/1` + +使用 RAM 角色绑定 ECS 时,创建 JucieFS 存储只需指定 `--storage` 和 `--bucket` 两个选项,无需提供 API 访问秘钥。命令可以改写成: + +```shell +$ juicefs format \ + --storage oss \ + --bucket https://mytest.oss-cn-shanghai.aliyuncs.com \ + redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + mystor +``` + +看到类似下面的输出,代表文件系统创建成功了。 + +```shell +2021/07/13 16:37:14.264445 juicefs[22290] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 +2021/07/13 16:37:14.277632 juicefs[22290] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:37:14.281432 juicefs[22290] : Ping redis: 3.609453ms +2021/07/13 16:37:14.527879 juicefs[22290] : Data uses oss://mytest/mystor/ +2021/07/13 16:37:14.593450 juicefs[22290] : Volume is formatted as {Name:mystor UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://mytest340 AccessKey:LTAI4G4v6ioGzQXy56m3XDkG SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +## 挂载 JuiceFS 存储 + +文件系统创建完成,对象存储相关的信息会被存入数据库,挂载时无需再输入对象存储的 Bucket 和秘钥等信息。 + +使用 `mount` 子命令,将文件系统挂载到 `/mnt/jfs` 目录: + +```shell +$ sudo juicefs mount -d redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs +``` + +> **注意**:挂载文件系统时,只需填写 Redis 数据库地址,不需要文件系统名称。默认的缓存路径为 `/var/jfsCache`,请确保当前用户有足够的读写权限。 + +看到类似下面的输出,代表文件系统挂载成功。 + +```shell +2021/07/13 16:40:37.088847 juicefs[22307] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com/1 +2021/07/13 16:40:37.101279 juicefs[22307] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:40:37.104870 juicefs[22307] : Ping redis: 3.408807ms +2021/07/13 16:40:37.384977 juicefs[22307] : Data use oss://mytest/mystor/ +2021/07/13 16:40:37.387412 juicefs[22307] : Disk cache (/var/jfsCache/4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b/): capacity (1024 MB), free ratio (10%), max pending pages (15) +.2021/07/13 16:40:38.410742 juicefs[22307] : OK, mystor is ready at /mnt/jfs +``` + +使用 `df` 命令,可以看到文件系统的挂载情况: + +```shell +$ df -Th +文件系统 类型 容量 已用 可用 已用% 挂载点 +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +文件系统挂载成功以后,现在就可以像使用本地硬盘那样,在 `/mnt/jfs` 目录中存储数据了。 + +> **多主机共享**:JuiceFS 存储支持被多台云服务器同时挂载使用,你可以在其他 ECS 上安装 JuiceFS 客户端,然后使用 `redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1` 数据库地址挂载文件系统到每一台主机上。 + +## 查看文件系统状态 + +使用 JuiceFS 客户端的 `status` 子命令可以查看一个文件系统的基本信息和连接状态。 + +```shell +$ juicefs status redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 + +2021/07/13 16:56:17.143503 juicefs[22415] : Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 +2021/07/13 16:56:17.157972 juicefs[22415] : maxmemory_policy is "volatile-lru", please set it to 'noeviction'. +2021/07/13 16:56:17.161533 juicefs[22415] : Ping redis: 3.392906ms +{ + "Setting": { + "Name": "mystor", + "UUID": "4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b", + "Storage": "oss", + "Bucket": "https://mytest", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 3, + "Heartbeat": "2021-07-13T16:55:38+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "demo-test-sh", + "MountPoint": "/mnt/jfs", + "ProcessID": 22330 + } + ] +} +``` + +## 卸载 JuiceFS 存储 + +使用 JuiceFS 客户端提供的 `umount` 命令即可卸载文件系统,比如: + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **注意**:强制卸载使用中的文件系统可能导致数据损坏或丢失,请务必谨慎操作。 + +## 开机自动挂载 + +如果你不想每次重启系统都要重新手动挂载 JuiceFS 存储,可以设置自动挂载文件系统。 + +首先,需要将 `juicefs` 客户端重命名为 `mount.juicefs` 并复制到 `/sbin/` 目录: + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +编辑 `/etc/fstab` 配置文件,新增一条记录: + +```shell +redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +挂载选项中 `cache-size=20480` 代表分配 20GB 本地磁盘空间作为 JuiceFS 的缓存使用,请根据你实际的 ECS 硬盘容量去决定分配的缓存大小。一般来说,为 JuiceFS 分配更大的缓存空间,可以获得更好的性能表现。 + +你可以根据需要调整上述配置中的 FUSE 挂载选项,更多内容请[查阅文档](../reference/fuse_mount_options.md)。 + +> **注意**:请将上述配置文件中的 Redis 地址、挂载点以及挂载选项,替换成你实际的信息。 diff --git a/docs/zh_cn/tutorials/aws.md b/docs/zh_cn/tutorials/aws.md new file mode 100644 index 0000000..849fa41 --- /dev/null +++ b/docs/zh_cn/tutorials/aws.md @@ -0,0 +1,244 @@ +--- +sidebar_label: 在 AWS 上使用 JuiceFS +sidebar_position: 6 +slug: /clouds/aws +--- + +# 在亚马逊 AWS 安装和使用 JuiceFS 存储 + +亚马逊 AWS 是全球领先的云计算平台,提供几乎所有类型的云计算服务。得益于 AWS 丰富的产品线,用户可以非常灵活的搭配选择 JuiceFS 组成部分。 + +## 准备 + +通过阅读文档可以了解到 JuiceFS 由以下三个部分组成: + +1. 运行在服务器上 **JuiceFS 客户端** +2. 用来存储数据的**对象存储** +3. 用来存储元数据的**数据库** + +### 1. 服务器 + +Amazon EC2 云服务器是 AWS 平台上最基础,也是应用最广泛的云服务之一。它提供 400 多种实例规格,在全球有 25 个数据中心 81 个可用区,用户可以根据实际需求灵活的选择和调整 EC2 实例的配置。 + +对于新用户来说,你并不需要过多考虑 JuiceFS 的配置要求,因为即便是配置最低的 EC2 实例,也能轻松创建和挂载使用 JuiceFS 存储。通常,你只需要考虑业务系统的硬件需求即可。 + +JuiceFS 客户端默认会占用 1GB 的磁盘作为缓存,在处理大量文件时,客户端会将数据先缓存在磁盘上,然后再异步上传到对象存储,选择 IO 更高的磁盘,预留并设置更大的缓存,可以让 JuiceFS 拥有更好的性能表现。 + +### 2. 对象存储 + +Amazon S3 是公有云对象存储服务的事实标准,其他主流云平台所提供的对象存储服务通常都兼容 S3 API,这使得面向 S3 开发的程序可以自由切换其他平台的对象存储服务。 + +JuiceFS 完全支持 Amazon S3 以及所有兼容 S3 API 对象存储服务,你可以查看文档了解 [JuiceFS 支持的所有存储类型](../reference/how_to_setup_object_storage.md)。 + +Amazon S3 提供一系列适合不同使用案例的存储类,主要有: + +- **Amazon S3 STANDARD**:适用于频繁访问数据的通用型存储 +- **Amazon S3 STANDARD_IA**:适用于长期需要但访问频率不太高的数据 +- **S3 Glacier**:适用于长期存档的数据 + +通常应该使用标准类型的 S3 用于 JuiceFS,因为除标准类型即 Amazon S3 STANDARD 之外,其他的类型虽然价格更低,但在检索(取回)数据时都会产生额外的费用。 + +另外,访问对象存储服务需要通过 `access key` 和 `secret key` 验证用户身份,你可以参照文档[《使用用户策略控制对存储桶的访问》](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/walkthrough1.html)进行创建。当通过 EC2 云服务器访问 S3 时,还可以为 EC2 分配 [IAM 角色](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_roles.html),实现在 EC2 上免密钥调用 S3 API。 + +### 3. 数据库 + +数据和元数据能够被多主机访问是分布式文件系统的关键,为了让 JuiceFS 产生的元数据信息能够像 S3 那样通过互联网请求访问,存储元数据的数据库也应该选择面向网络的数据库。 + +Amazon RDS 和 ElastiCache 是 AWS 提供的两种云数据库服务,都能直接用于 JuiceFS 的元数据存储。Amazon RDS 是关系型数据库,支持 MySQL、MariaDB、PostgreSQL 等多种引擎。ElastiCache 是基于内存的缓存集群服务,用于 JuiceFS 时应选择 Redis 引擎。 + +此外,你还可以在 EC2 云服务器上自行搭建数据库供 JuiceFS 存储元数据使用。 + +### 4. 注意事项 + +- 你无需为使用 JuiceFS 重新创建各种云服务资源,可以直接在现有的 EC2 云服务器上安装 JuiceFS 客户端立即开始使用。JuiceFS 没有业务入侵性,不会影响现有系统的正常运行。 +- 在选择云服务时,建议将所有的云服务选择在相同的**区域**,这样就相当于所有服务都在同一个内网,互访的时延最低,速度最快。并且,根据 AWS 的计费规则,相同区域的基础云服务之间互传数据是免费的。换言之,当你选择了不同区域的云服务,例如,EC2 选择在 `ap-east-1`、ElastiCache 选择在 `ap-southeast-1`、S3 选择在 `us-east-2`,这种情况下每个云服务之间的互访都将产生流量费用。 +- JuiceFS 不要求使用相同云平台的对象存储和数据库,你可以根据需要灵活搭配不同平台的云服务。比如,你可以使用 EC2 运行 JuiceFS 客户端,搭配阿里云的 Redis 数据库和 Backbalze B2 对象存储。当然,同一平台、相同区域的云服务组成的 JuiceFS 存储的性能会更出色。 + +## 部署和使用 + +接下来,我们以相同区域的 EC2 云服务器、S3 对象存储和 Redis 引擎的 ElastiCache 集群为例,简要的介绍如何安装和使用 JuiceFS。 + +### 1. 安装客户端 + +这里我们使用的是 x64 位架构的 Linux 系统,依次执行以下命令,会下载最新版 JuiceFS 客户端。 + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +下载完成以后,解压程序到 `juice` 文件夹: + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +将 JuiceFS 客户端安装系统的 $PATH 路径,例如:`/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +执行命令,看到返回 `juicefs` 的命令帮助信息,代表客户端安装成功。 + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.17.0 (2021-09-24T04:17:26Z e115dc4) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime statistics + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +> **提示**:如果执行 `juicefs` 命令,终端返回 `command not found`,可能是因为 `/usr/local/bin` 目录不在系统的 `PATH` 可执行路径中。你可以通过 `echo $PATH` 命令查看系统已设置的可执行路径,并将客户端重新安装到正确的位置。也可以将 `/usr/local/bin` 添加到 `PATH` 中。 + +JuiceFS 具有良好的跨平台兼容性,同时支持在 Linux、Windows 和 macOS 上使用,如果你需要了解其他系统上的安装方法,请查阅[官方文档](../getting-started/installation.md)。 + +### 3. 创建文件系统 + +JuiceFS 客户端的 `format` 子命令用来创建(格式化)文件系统,这里我们使用 S3 作为数据存储,使用 ElastiCache 作为元数据存储,在 EC2 上安装客户端并创建 JuiceFS 文件系统,命令格式如下: + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://.s3..amazonaws.com \ + --access-key \ + --secret-key \ + redis://[]:@:6379/1 \ + mystor +``` + +**选项说明:** + +- `--storage`:指定对象存储类型,这里我们使用 S3。如需使用其他对象存储,请参考[《JuiceFS 支持的对象存储和设置指南》](../reference/how_to_setup_object_storage.md)。 +- `--bucket`:对象存储的 Bucket 域名。 +- `--access-key` 和 `--secret-key`:访问 S3 API 的秘钥对。 + +> Redis 6.0 及以上版本,身份认证需要用户名和密码两个参数,地址格式为 `redis://username:password@redis-server-url:6379/1`。Reids 4.0 和 5.0,认证身份只需要密码,在设置 Redis 服务器地址时只需留空用户名,例如:`redis://:password@redis-server-url:6379/1` + +使用 IAM 角色绑定 EC2 时,只需指定 `--storage` 和 `--bucket` 两个选项,无需提供 API 访问秘钥。同时,也可以给 IAM 角色分配 ElastiCache 访问权限,然后就可以不用提供 Redis 的身份认证信息,只需输入 Redis 的 URL 即可,命令可以改写成: + +```shell +$ juicefs format \ + --storage s3 \ + --bucket https://herald-demo.s3..amazonaws.com \ + redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 \ + mystor +``` + +看到类似下面的输出,代表文件系统创建成功了。 + +```shell +2021/10/14 08:38:32.211044 juicefs[10391] : Meta address: redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 +2021/10/14 08:38:32.216566 juicefs[10391] : Ping redis: 383.789µs +2021/10/14 08:38:32.216915 juicefs[10391] : Data use s3://herald-demo/mystor/ +2021/10/14 08:38:32.412112 juicefs[10391] : Volume is formatted as {Name:mystor UUID:21a2cafd-f5d8-4a76-ae4d-482c8e2d408d Storage:s3 Bucket:https://herald-demo.s3.ap-southeast-1.amazonaws.com AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +### 4. 挂载文件系统 + +创建文件系统的过程会将对象存储包括 API 密钥等信息存入数据库,挂载时无需再输入对象存储的 Bucket 和秘钥等信息。 + +使用 JuiceFS 客户端的 `mount` 子命令,将文件系统挂载到 `/mnt/jfs` 目录: + +```shell +$ sudo juicefs mount -d redis://[]:@:6379/1 /mnt/jfs +``` + +> **注意**:挂载文件系统时,只需填写数据库地址,不需要文件系统名称。默认的缓存路径为 `/var/jfsCache`,请确保当前用户有足够的读写权限。 + +你可以通过调整[挂载参数](../reference/command_reference.md#juicefs-mount),对 JuiceFS 进行优化,比如可以通过 `--cache-size` 将缓存修改为 20GB: + +```shell +$ sudo juicefs mount --cache-size 20480 -d redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 /mnt/jfs +``` + +看到类似下面的输出,代表文件系统挂载成功。 + +```shell +2021/10/14 08:47:49.623814 juicefs[10601] : Meta address: redis://herald-demo.abcdefg.0001.apse1.cache.amazonaws.com:6379/1 +2021/10/14 08:47:49.628157 juicefs[10601] : Ping redis: 426.127µs +2021/10/14 08:47:49.628941 juicefs[10601] : Data use s3://herald-demo/mystor/ +2021/10/14 08:47:49.629198 juicefs[10601] : Disk cache (/var/jfsCache/21a2cafd-f5d8-4a76-ae4d-482c8e2d408d/): capacity (20480 MB), free ratio (10%), max pending pages (15) +2021/10/14 08:47:50.132003 juicefs[10601] : OK, mystor is ready at /mnt/jfs +``` + +使用 `df` 命令,可以看到文件系统的挂载情况: + +```shell +$ df -Th +文件系统 类型 容量 已用 可用 已用% 挂载点 +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +挂载之后就可以像本地硬盘那样使用了,存入 `/mnt/jfs` 目录的数据会由 JuiceFS 客户端协调管理,最终存储在 S3 对象存储。 + +> **多主机共享**:JuiceFS 支持被多主机同时挂载使用,你可以在任何平台的任何云服务器上安装 JuiceFS 客户端,使用 `redis://:@herald-sh-abc.redis.rds.aliyuncs.com:6379/1` 数据库地址挂载文件系统即可共享读写,但需要确保挂载文件系统的主机能够正常访问到该数据库和搭配使用的 S3。 + +### 5. 卸载 JuiceFS 存储 + +使用 JuiceFS 客户端提供的 `umount` 命令可卸载文件系统,比如: + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **注意**:强制卸载使用中的文件系统可能导致数据损坏或丢失,请务必谨慎操作。 + +### 6. 开机自动挂载 + +如果你不想每次重启系统都要重新手动挂载 JuiceFS 存储,可以设置自动挂载。 + +首先,需要将 `juicefs` 客户端重命名为 `mount.juicefs` 并复制到 `/sbin/` 目录: + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +编辑 `/etc/fstab` 配置文件,新增一条记录: + +```shell +redis://[]:@:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +挂载选项中 `cache-size=20480` 代表分配 20GB 本地磁盘空间作为 JuiceFS 的缓存使用,请根据你实际的 EBS 磁盘容量去决定分配的缓存大小。 + +你可以根据需要调整上述配置中的 FUSE 挂载选项,更多内容请[查阅文档](../reference/fuse_mount_options.md)。 + +> **注意**:请将上述配置文件中的 Redis 地址、挂载点以及挂载选项,替换成你实际的信息。 diff --git a/docs/zh_cn/tutorials/digitalocean.md b/docs/zh_cn/tutorials/digitalocean.md new file mode 100644 index 0000000..fce56ce --- /dev/null +++ b/docs/zh_cn/tutorials/digitalocean.md @@ -0,0 +1,294 @@ +--- +sidebar_label: 在 DigitalOcean 使用 JuiceFS +sidebar_position: 7 +slug: /clouds/digitalocean +--- +# 在 DigitalOcean 安装和使用 JuiceFS 存储 + +JuiceFS 是面向云设计的,使用云平台开箱即用的存储和数据库服务,最快几分钟就能完成配置投入使用,本文以 DigitalOcean 平台为例,介绍如何在云计算平台上快速简单的安装和使用 JuiceFS。 + +## 准备工作 + +JuiceFS 由存储和数据库组合驱动,因此你需要准备的东西应该包括: + +### 1. 云服务器 + +DigitalOcean 上的云服务器被称为 Droplet。你不需要为使用 JuiceFS 而单独购买新的 Droplet,哪个云服务器上需要使用 JuiceFS 存储,就在它上面安装 JuiceFS 客户端即可。 + +#### 硬件配置 + +JuiceFS 对硬件配置没有特殊的要求,任何规格的 Droplet 都能稳定的使用。但建议选择性能更好的 SSD 并预留至少 1GB 的容量提供给 JuiceFS 作为本地缓存使用。 + +#### 操作系统 + +JuiceFS 支持 Linux、BSD、macOS 和 Windows,在本文中,我们会以 Ubuntu Server 20.04 为例进行介绍。 + +### 2. 对象存储 + +JuiceFS 使用对象存储来存储所有的数据,在 DigitalOcean 上使用 Spaces 是最简便的方案。Spaces 是一个 S3 兼容的对象存储服务,开箱即用。在创建时建议选择与 Droplet 相同的区域,这样可以获得最佳的访问速度,同时也能避免额外的流量开销。 + +当然,你也可以使用其他平台的对象存储服务,或是在 Droplet 上使用 Ceph 或 MinIO 手动搭建。总之,你可以自由选择要使用的对象存储,只要确保 JuiceFS 客户端能够访问到对象存储的 API 就可以。 + +这里,我们创建了一个名为 `juicefs` 的 Spaces 存储桶,区域为新加坡 `sgp1`,它的访问地址为: + +- https://juicefs.sgp1.digitaloceanspaces.com + +另外,还需要在 API 菜单创建 `Spaces access keys`,JuiceFS 需要用它访问 Spaces 的 API。 + +### 3. 数据库 + +与一般的文件系统不同,JuiceFS 将数据所对应的所有元数据都存储在独立的数据库,存储的数据规模越大性能越出色。目前,JuiceFS 支持 Redis、TiKV、MySQL/MariaDB、PostgreSQL、SQLite 等常见数据库,同时也在持续开发对其他数据库的支持。如果你需要的数据库暂未支持,请提交 [Issuse](https://github.com/juicedata/juicefs/issues) 反馈。 + +在性能、规模和可靠性等方面,每种数据库都有各自的优缺点,你应该根据实际的场景需要进行选择。 + +在数据库的选择方面请不要有顾虑,JuiceFS 客户端提供了元数据迁移功能,你可以将元数据从一种数据库中轻松的导出并迁移到其他的数据库中。 + +本文我们使用 DigitalOcean 的 Redis 6 数据库托管服务,区域选择 `新加坡`,选择与已存在的 Droplet 相同的 VPC 私有网络。创建 Redis 大概需要 5 分钟左右的时间, 我们跟随设置向导对数据库进行初始化设置。 + +![](../images/digitalocean-redis-guide.png) + +默认情况下 Redis 允许所有入站连接,出于安全考虑,应该在设置向导的安全设置环节,在 `Add trusted sources` 中选中有权访问 Redis 的 Droplet,即仅允许选中的主机访问 Redis。 + +在数据回收策略的设置环节,建议选择 `noeviction`,即当内存耗尽时,仅报告错误,不回收任何数据。 + +> **注意**:为了确保元数据的安全和完整,回收策略请不要选择 `allkeys-lru` 和 `allkey-random`。 + +Redis 的访问地址可以从控制台的 `Connection Details` 中找到,如果所有计算资源都在 DigitalOcea,则建议优先使用 VPC 私有网络进行连接,这样能最大程度的提升安全性。 + +![](../images/digitalocean-redis-url.png) + +## 安装和使用 + +### 1. 安装 JuiceFS 客户端 + +我们当前使用的是 Ubuntu Server 20.04,依次执行以下命令即可安装最新版本客户端。 + +检测当前系统信息并设置临时的环境变量: + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +下载适配当前系统的最新版客户端软件包: + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +解压安装包: + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +将客户端安装到 `/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +执行命令,看到返回 `juicefs` 的命令帮助信息,代表客户端安装成功。 + +```shell +$ juicefs + +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.16.2 (2021-08-25T04:01:15Z 29d6fee) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + stats show runtime stats + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +另外,你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本进行手动安装。 + +### 2. 创建文件系统 + +创建文件系统使用 `format` 子命令,格式为: + +```shell +$ juicefs format [command options] META-URL NAME +``` + +以下命令创建了一个名为 `mystor` 的文件系统: + +```shell +$ juicefs format \ + --storage space \ + --bucket https://juicefs.sgp1.digitaloceanspaces.com \ + --access-key \ + --secret-key \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 \ + mystor +``` + +**参数说明:** + +- `--storage`:指定数据存储引擎,这里使用的是 `space`,点此查看所有[支持的存储](../reference/how_to_setup_object_storage.md)。 +- `--bucket`:指定存储桶访问地址。 +- `--access-key` 和 `--secret-key`:指定访问对象存储 API 的秘钥。 +- DigitalOcean 托管的 Redis 需要使用 TLS/SSL 加密访问,因此需要使用 `rediss://` 协议头,链接最后添加的 `/1` 代表使用 Redis 的 1 号数据库。 + +看到类似下面的输出,代表文件系统创建成功。 + +```shell +2021/08/23 16:36:28.450686 juicefs[2869028] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:36:28.481251 juicefs[2869028] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:36:28.481763 juicefs[2869028] : Ping redis: 331.706µs +2021/08/23 16:36:28.482266 juicefs[2869028] : Data uses space://juicefs/mystor/ +2021/08/23 16:36:28.534677 juicefs[2869028] : Volume is formatted as {Name:mystor UUID:6b0452fc-0502-404c-b163-c9ab577ec766 Storage:space Bucket:https://juicefs.sgp1.digitaloceanspaces.com AccessKey:7G7WQBY2QUCBQC5H2DGK SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +### 3. 挂载文件系统 + +挂载文件系统使用 `mount` 子命令,使用 `-d` 参数以守护进程的形式挂载。以下命令将刚刚创建的文件系统挂载到当前目录下的 `mnt` 目录: + +```shell +$ sudo juicefs mount -d \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt +``` + +使用 sudo 执行挂载操作的目的是为了让 JuiceFS 能够有权限在 `/var/` 下创建缓存目录。值得注意的是,在挂载文件系统时,只需要指定`数据库地址`和`挂载点`,并不需要指定文件系统的名称。 + +看到类似下面的输出,代表文件系统挂载成功。 + +```shell +2021/08/23 16:39:14.202151 juicefs[2869081] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:39:14.234925 juicefs[2869081] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:39:14.235536 juicefs[2869081] : Ping redis: 446.247µs +2021/08/23 16:39:14.236231 juicefs[2869081] : Data use space://juicefs/mystor/ +2021/08/23 16:39:14.236540 juicefs[2869081] : Disk cache (/var/jfsCache/6b0452fc-0502-404c-b163-c9ab577ec766/): capacity (1024 MB), free ratio (10%), max pending pages (15) +2021/08/23 16:39:14.738416 juicefs[2869081] : OK, mystor is ready at mnt +``` + +使用 `df` 命令,可以看到文件系统的挂载情况: + +```shell +$ df -Th +文件系统 类型 容量 已用 可用 已用% 挂载点 +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /home/herald/mnt +``` + +从挂载命令的输出信息中可以看到,JuiceFS 默认设置了 1024 MB 的作为本地缓存。设置更大的缓存,可以让 JuiceFS 有更好的性能表现,可以在挂载文件系统时通过 `--cache-size` 选项设置缓存(单位 MiB),例如,设置 20GB 的本地缓存: + +```shell +$ sudo juicefs mount -d --cache-size 20000 \ + rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt +``` + +文件系统挂载成功以后,就可以像使用本地硬盘那样,在 `~/mnt` 目录中存储数据了。 + +### 4. 查看文件系统 + +使用 `status` 子命令可以查看一个文件系统的基本信息和连接状态,只需指定数据库访问地址即可。 + +```shell +$ juicefs status rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:48:48.567046 juicefs[2869156] : Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 +2021/08/23 16:48:48.597513 juicefs[2869156] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/08/23 16:48:48.598193 juicefs[2869156] : Ping redis: 491.003µs +{ + "Setting": { + "Name": "mystor", + "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766", + "Storage": "space", + "Bucket": "https://juicefs.sgp1.digitaloceanspaces.com", + "AccessKey": "7G7WQBY2QUCBQC5H2DGK", + "SecretKey": "removed", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 1, + "Heartbeat": "2021-08-23T16:46:14+08:00", + "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)", + "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01", + "MountPoint": "/home/herald/mnt", + "ProcessID": 2869091 + }, + { + "Sid": 2, + "Heartbeat": "2021-08-23T16:47:59+08:00", + "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)", + "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01", + "MountPoint": "/home/herald/mnt", + "ProcessID": 2869146 + } + ] +} +``` + +### 5. 卸载文件系统 + +使用 `umount` 子命令卸载文件系统,比如: + +```shell +$ sudo juicefs umount ~/mnt +``` + +> **注意**:强制卸载使用中的文件系统可能导致数据损坏或丢失,请务必谨慎操作。 + +### 6. 开机自动挂载 + +如果你不想每次重启系统都要重新手动挂载 JuiceFS,可以设置自动挂载。 + +首先,需要将 `juicefs` 客户端重命名为 `mount.juicefs` 并复制到 `/sbin/` 目录: + +```shell +$ sudo cp /usr/local/bin/juicefs /sbin/mount.juicefs +``` + +编辑 `/etc/fstab` 配置文件,新增一条记录: + +```shell +rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 /home/herald/mnt juicefs _netdev,cache-size=20480 0 0 +``` + +挂载选项中 `cache-size=20480` 代表分配 20GiB 本地磁盘空间作为 JuiceFS 的本地缓存,请根据实际的硬件配置决定分配的缓存大小。你可以根据需要调整上述配置中的 [FUSE 挂载选项](../reference/fuse_mount_options.md)。 + +### 7. 多主机共享挂载 + +JuiceFS 文件系统支持被多台云服务器同时挂载,而且对云服务器的地理位置没有要求,可以很容的实现同平台之间、跨云平台之间、公有云和私有云之间服务器的数据实时共享。 + +不单如此,JuiceFS 的共享挂载功能还能提供数据的强一致性保证,在多台服务器挂载了同一个文件系统时,文件系统上确认的写入会在所有主机上实时可见。 + +使用共享挂载功能,务必要确保组成文件系统的数据库和对象存储服务,能够被每一台要挂载它的主机正常访问。在本文的演示环境中,Spaces 对象存储是对整个互联网开放访问的,只要使用正确的秘钥就能够通过 API 进行读写。但对于平台托管的 Redis 数据库,你需要合理的配置访问策略,确保平台外的主机有访问权限。 + +在使用多主机共享挂载功能时,首先在任何一台主机上创建文件系统,然后在其他主机上安装 JuiceFS 客户端,使用同一个数据库地址通过 `mount` 命令挂载即可。特别注意,文件系统只需创建一次,不应该也不需要在其他主机上重复执行文件系统创建操作。 diff --git a/docs/zh_cn/tutorials/juicefs_on_k3s.md b/docs/zh_cn/tutorials/juicefs_on_k3s.md new file mode 100644 index 0000000..353b1f2 --- /dev/null +++ b/docs/zh_cn/tutorials/juicefs_on_k3s.md @@ -0,0 +1,273 @@ +--- +sidebar_label: 在 K3s 上使用 JuiceFS +sidebar_position: 1 +slug: /juicefs_on_k3s +--- +# 在 K3s 上使用 JuiceFS + +[K3s](https://k3s.io/) 是一个经过功能优化的 Kubernetes 发行版,它与 Kubernetes 完全兼容,即几乎所有在 Kubernetes 的操作都可以在 K3s 上执行。K3s 将整个容器编排系统打包进了一个容量不足 100MB 的二进制程序,减少了部署 Kubernetes 生产集群的环境依赖,大大降低了安装难度。相比之下,K3s 对操作系统的性能要求更低,树莓派等 ARM 设备都可以用来组建集群。 + +在本文中,我们会建立一个包含两个节点的 K3s 集群,为集群安装并配置使用 [JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver),最后会创建一个 Nginx 容器进行验证。 + +## 部署 K3s 集群 + +K3s 对硬件的**最低要求**很低: + +- **内存**:512MB+(建议 1GB+) +- **CPU**:1 核 + +在部署生产集群时,通常可以将树莓派 4B(4 核 CPU,8G 内存)作为一个节点的硬件配置起点,详情查看[硬件需求](https://rancher.com/docs/k3s/latest/en/installation/installation-requirements/#hardware)。 + +### K3s server 节点 + +运行 server 节点的服务器 IP 地址为:`192.168.1.35` + +使用 K3s 官方提供的脚本,即可将常规的 Linux 发行版自动部署成为 server 节点。 + +```shell +$ curl -sfL https://get.k3s.io | sh - +``` + +部署成功后,K3s 服务会自动启动,kubectl 等工具也会一并安装。 + +执行命令查看节点状态: + +```shell +$ sudo kubectl get nodes +NAME STATUS ROLES AGE VERSION +k3s-s1 Ready control-plane,master 28h v1.21.4+k3s1 +``` + +获取 `node-token`: + +```shell +$ sudo -u root cat /var/lib/rancher/k3s/server/node-token +K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 +``` + +### K3s worker 节点 + +运行 worker 节点的服务器 IP 地址为:`192.168.1.36` + +执行以下命令,将其中 `K3S_URL` 的值改成 server 节点的 IP 或域名,默认端口 `6443`。将 `K3S_TOKEN` 的值替换成从 server 节点获取的 `node-token`。 + +```shell +$ curl -sfL https://get.k3s.io | K3S_URL=http://192.168.1.35:6443 K3S_TOKEN=K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 sh - +``` + +部署成功以后,回到 server 节点查看节点状态: + +```shell +$ sudo kubectl get nodes +NAME STATUS ROLES AGE VERSION +k3s-s1 Ready control-plane,master 28h v1.21.4+k3s1 +k3s-n1 Ready 28h v1.21.4+k3s1 +``` + +## 安装 CSI Driver + +与在 [Kubernetes 上安装 JuiceFS CSI Driver](../deployment/how_to_use_on_kubernetes.md) 的方法一致,你可以通过 Helm 安装,也可以通过 kubectl 安装。 + +这里我们用 kubectl 安装,执行以下命令安装 JuiceFS CSI Driver: + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml +``` + +### 创建存储类 + +复制并修改以下代码创建一个配置文件,例如:`juicefs-sc.yaml` + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: juicefs-sc-secret + namespace: kube-system +type: Opaque +stringData: + name: "test" + metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3" + storage: "s3" + bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com" + access-key: "" + secret-key: "" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: juicefs-sc +provisioner: csi.juicefs.com +reclaimPolicy: Retain +volumeBindingMode: Immediate +parameters: + csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret + csi.storage.k8s.io/node-publish-secret-namespace: kube-system + csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret + csi.storage.k8s.io/provisioner-secret-namespace: kube-system +``` + +配置文件中 `stringData` 部分用来设置 JuiceFS 文件系统相关的信息,系统会根据你指定的信息创建文件系统。当需要在存储类中使用已经预先创建好的文件系统时,则只需要填写 `name` 和 `metaurl` 两项即可,其他项可以删除或将值留空。 + +执行命令,部署存储类: + +```shell +$ kubectl apply -f juicefs-sc.yaml +``` + +查看存储类状态: + +```shell +$ sudo kubectl get sc +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +local-path (default) rancher.io/local-path Delete WaitForFirstConsumer false 28h +juicefs-sc csi.juicefs.com Retain Immediate false 28h +``` + +> **注意**:一个存储类与一个 JuiceFS 文件系统相关联,你可以根据需要创建任意数量的存储类。但需要注意修改配置文件中的存储类名称,避免同名冲突。 + +## 使用 JuiceFS 持久化 Nginx 数据 + +接下来部署一个 Nginx Pod,使用 JuiceFS 存储类声明的持久化存储。 + +### Depolyment + +创建一个配置文件,例如:`depolyment.yaml` + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: web-pvc +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 10Pi + storageClassName: juicefs-sc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-run + labels: + app: nginx +spec: + replicas: 2 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: linuxserver/nginx + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /config + name: web-data + volumes: + - name: web-data + persistentVolumeClaim: + claimName: web-pvc +``` + +执行部署: + +``` +$ sudo kubectl apply -f depolyment.yaml +``` + +### Service + +创建一个配置文件,例如:`service.yaml` + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: nginx-run-service +spec: + selector: + app: nginx + ports: + - name: http + port: 80 +``` + +执行部署: + +```shell +$ sudo kubectl apply -f service.yaml +``` + +### Ingress + +K3s 默认预置了 traefik-ingress,通过以下配置为 Nginx 创建一个 ingress。例如:`ingress.yaml` + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: nginx-run-ingress + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web +spec: + rules: + - http: + paths: + - pathType: Prefix + path: "/web" + backend: + service: + name: nginx-run-service + port: + number: 80 +``` + +执行部署: + +```shell +$ sudo kubectl apply -f ingress.yaml +``` + +### 访问 + +部署完成以后,使用相同局域网的主机访问任何一个集群节点,即可看到 Nginx 的欢迎页面。 + +![](../images/k3s-nginx-welcome.png) + +接下来查看一下容器是否成功挂载了 JuiceFS,执行命令查看 pod 状态: + +```shell +$ sudo kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-run-7d6fb7d6df-qhr2m 1/1 Running 0 28h +nginx-run-7d6fb7d6df-5hpv7 1/1 Running 0 24h +``` + +执行命令,查看任何一个 pods 的文件系统挂载情况: + +```shell +$ sudo kubectl exec nginx-run-7d6fb7d6df-qhr2m -- df -Th +Filesystem Type Size Used Avail Use% Mounted on +overlay overlay 20G 3.2G 17G 17% / +tmpfs tmpfs 64M 0 64M 0% /dev +tmpfs tmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup +JuiceFS:jfs fuse.juicefs 1.0P 174M 1.0P 1% /config +/dev/sda1 ext4 20G 3.2G 17G 17% /etc/hosts +shm tmpfs 64M 0 64M 0% /dev/shm +tmpfs tmpfs 2.0G 12K 2.0G 1% /run/secrets/kubernetes.io/serviceaccount +tmpfs tmpfs 2.0G 0 2.0G 0% /proc/acpi +tmpfs tmpfs 2.0G 0 2.0G 0% /proc/scsi +tmpfs tmpfs 2.0G 0 2.0G 0% /sys/firmware +``` + +可以看到,名为 `jfs` 的文件系统已经挂载到了容器的 `/config` 目录,已使用空间为 174M。 + +这就表明集群中的 Pod 已经成功配置并使用 JuiceFS 持久化数据了。 diff --git a/docs/zh_cn/tutorials/juicefs_on_kubesphere.md b/docs/zh_cn/tutorials/juicefs_on_kubesphere.md new file mode 100644 index 0000000..7aa6331 --- /dev/null +++ b/docs/zh_cn/tutorials/juicefs_on_kubesphere.md @@ -0,0 +1,137 @@ +--- +sidebar_label: 在 KubeSphere 上使用 JuiceFS +sidebar_position: 3 +slug: /juicefs_on_kubesphere +--- +# 在 KubeSphere 上使用 JuiceFS + +[KubeSphere](https://kubesphere.com.cn/) 是在 Kubernetes 之上构建的以应用为中心的多租户容器平台,提供全栈的 IT 自动化运维的能力,简化企业的 DevOps 工作流。 + +KubeSphere 提供了运维友好的向导式操作界面,即便是 Kubernetes 经验并不丰富的用户,也能相对轻松的上手开始管理和使用。它提供了基于 Helm 的应用市场,可以在图形化界面下非常轻松地安装各种 Kubernetes 应用。 + +本文将介绍如何在 KubeSphere 中一键部署 JuiceFS CSI Driver,为集群上的各种应用提供数据持久化。 + +## 前提条件 + +1. 安装 KubeSphere + +安装 KubeSphere 有两种方法。一是在 Linux 上直接安装,可以参考文档:[在 Linux 安装 KubeSphere](https://kubesphere.com.cn/docs/quick-start/all-in-one-on-linux/) ; +二是在已有 Kubernetes 中安装,可以参考文档:[在 Kubernetes 安装 KubeSphere](https://kubesphere.com.cn/docs/quick-start/minimal-kubesphere-on-k8s/) 。 + +2. 在 KubeSphere 中启用应用商店 + +在 KubeSphere 中启用应用商店可以参考文档:[KubeSphere 应用商店](https://kubesphere.com.cn/docs/pluggable-components/app-store/) 。 + +## 安装 JuiceFS CSI Driver + +如果 KubeSphere 的版本为 v3.2.0 及以上,可以直接在应用商店中安装 CSI Driver,跳过「配置应用模板/应用仓库」步骤,直接进入「安装」步骤;如果 KubeSphere 版本低于 v3.2.0,按照以下步骤配置应用模板/应用仓库。 + +### 配置应用模板/应用仓库 + +安装 JuiceFS CSI Driver 首先需要创建应用模板,这里有两种方法。 + +#### 方法一:应用仓库 + +在企业空间中点击进去应用管理,选择「应用仓库」,点击创建按钮添加 JuiceFS CSI 仓库,填写: + +- 仓库名称:juicefs-csi-driver +- Index URL:https://juicedata.github.io/juicefs-csi-driver/ + +![](../images/kubesphere_app_shop.png) + +#### 方法二:应用模板 + +先在 JuiceFS CSI Driver 仓库下载 chart 压缩包:https://github.com/juicedata/juicefs-csi-driver/releases 。 + +在「企业空间」中点击进入「应用管理」,选择「应用模板」,点击「创建」,上传 chart 压缩包: + +![](../images/kubesphere_app_template.png) + +### 安装 + +在「企业空间」中选择您所需部署的「项目」(KubeSphere 中的项目即为 K8s 中的 namespace),选择「应用负载」,点击「部署新应用」按钮,选择「来自应用商店」,然后选择「juicefs」: + +![](../images/kubesphere_shop_juicefs.jpg) + +若 KubeSphere 版本低于 v3.2.0,根据上一步配置好的应用模板,选择部署应用「来自应用模板」: + +![](../images/kubesphere_install_csi.png) + +进入配置修改页面后一致,修改以下两个地方: +- namespace:改成对应的项目名 +- storageClass.backend: + `backend` 部分用来定义文件系统后端的数据库和对象存储,可以查阅 [「JuiceFS 快速上手指南」](../getting-started/for_local.md) 了解相关内容。 + +您也可以通过 KubeSphere 的应用商店快速创建数据库(如 Redis)和对象存储(如 MinIO)。 +比如在 KubeSphere 平台搭建 Redis:在当前所在项目中选择「应用负载」,点击「部署新应用」按钮,选择「来自应用商店」,选择「Redis」,然后快速部署即可。Redis 的访问 URL 可以通过部署好的应用的服务名,如下: + +![](../images/kubesphere_redis.png) + +在 KubeSphere 平台搭建 MinIO 也是类似的流程,不过在部署 MinIO 之前可以修改 MinIO 的 accessKey 和 secretKey,并且需要记住配置的值。如下图: + +![](../images/kubesphere_create_minio.png) + +> 注:如果部署 MinIO 出现权限问题,可以将配置中的 `securityContext.enables` 设置为 false。 + +MinIO 的访问 URL 可以通过部署好的应用的服务名,如下: + +![](../images/kubesphere_minio.png) + +Redis 和 MinIO 都搭建好之后,就可以填写 JuiceFS CSI Driver 的 `backend` 值了。其中: + +1. `metaurl` 为刚才创建的 Redis 的数据库地址,Redis 的访问地址可用 Redis 应用对应的服务名,如 `redis://redis-rzxoz6:6379/1` +2. `storage` 为对象存储的类型,如 `minio` +3. `bucket` 为刚才创建的 MinIO 的可用 bucket(JuiceFS 会自动创建,不需要手动创建),MinIO 的访问地址可用 MinIO 应用对应的服务名,如 `http://minio-qkp9my:9000/minio/test` +4. `accessKey` 和 `secretKey` 用刚才创建的 MinIO 的 accessKey 和 secretKey + +![](../images/kubesphere_update_csi.png) + +配置修改完毕后,点击安装即可。 + +## 使用 + +### 部署应用 + +按照上述方法安装好的 JuiceFS CSI Driver 已经创建好一个 `StorageClass`,名为上述 `storageClass` 的 `name`,比如上述创建的 `StorageClass` 为 `juicefs-sc`,可以直接使用。 + +然后需要创建一个 PVC,指定使用 `juicefs-sc` 这个 `StorageClass`。在「项目」中,选择「存储管理」,再选择「存储卷」,点击「创建」按钮创建 PVC,其中「存储类型」选择 `juicefs-sc`,如下: + +![](../images/kubesphere_pvc.png) + +PVC 创建好之后,再在「项目」的「应用负载」中,选择「工作负载」,点击「创建」按钮部署工作负载,其中「基本信息」页填写自己喜欢的名字;「容器镜像」页可以填写镜像 `centos` ; +启动命令 `sh,-c,while true; do echo $(date -u) >> /data/out.txt; sleep 5; done` ;「存储卷来源」选择「已有存储卷」,再选择上一步创建的 PVC,容器内路径填写 `/data` 如下: + +![](../images/kubesphere_deployment.png) + +![](../images/kubesphere_workload.png) + +部署完成后可以看到运行中的容器组: + +![](../images/kubesphere_pod.png) + +### 新建 StorageClass + +若安装 JuiceFS CSI Driver 的时候没有创建 `StorageClass`,或者需要另外新建,可以遵循以下步骤: + +准备好元数据服务和对象存储服务后,新建一个 `Secret`。在「平台管理」页面选择「配置中心」,选择「密钥」,点击「创建」按钮新建: + +![](../images/kubesphere_create_secret.png) + +「密钥设置」中填入准备好的元数据服务和对象存储信息,如下: + +![](../images/kubesphere_update_secret.png) + +`Secret` 新建好之后,创建 `StorageClass`,在「平台管理」页面选择「存储管理」,选择「存储类型」,点击「创建」按钮新建,其中「存储系统」选择「自定义」: + +![](../images/kubesphere_sc_create.png) + +设置页面信息如下,其中「存储系统」填写 `csi.juicefs.com`,另外再设置 4 个参数: + +- `csi.storage.k8s.io/provisioner-secret-name`: 刚刚创建好的 secret name +- `csi.storage.k8s.io/provisioner-secret-namespace`: secret 对应的项目名 +- `csi.storage.k8s.io/node-publish-secret-name`: 刚刚创建好的 secret name +- `csi.storage.k8s.io/node-publish-secret-namespace`: secret 对应的项目名 + +![](../images/kubesphere_sc_update.png) + +点击「创建」按钮之后,`StorageClass` 就创建好了。 diff --git a/docs/zh_cn/tutorials/juicefs_on_rancher.md b/docs/zh_cn/tutorials/juicefs_on_rancher.md new file mode 100644 index 0000000..3ea9a26 --- /dev/null +++ b/docs/zh_cn/tutorials/juicefs_on_rancher.md @@ -0,0 +1,107 @@ +--- +sidebar_label: 在 Rancher 上使用 JuiceFS +sidebar_position: 2 +slug: /juicefs_on_rancher +--- +# 在 Rancher 上使用 JuiceFS + +简单来说,[Rancher](https://rancher.com/) 是一个企业级的 Kubernetes 集群管理工具,使用它可以非常轻松的在各种云计算平台上快速的完成 Kubernetes 集群的部署。 + +Rancher 提供了基于浏览器的管理界面,即便是 Kubernetes 经验并不丰富的用户,也能相对轻松的上手开始管理和使用。它默认预置了基于 Helm 的应用市场,可以在图形化界面下非常轻松的安装各种 Kubernetes 应用。 + +本文将介绍如何在 Linux 系统上部署 Rancher,并在上面创建 Kubernetes 集群,然后通过其内置的应用市场,一键部署 JuiceFS CSI Driver,为集群上的各种应用提供数据持久化。 + +## 安装 Rancher + +几乎所有主流的现代 Linux 发行版都可以安装 Rancher,它既可以直接安装在操作系统上,也可以安装在 Docker、Kubernetes、K3s 或 RKE 上,不论在哪种环境上安装都是“Product-Ready”的。 + +这里我们选择将 Rancher 安装在 Docker 上,配置上需要满足以下要求: + +- **操作系统**:x86-64 架构的 Linux 系统 +- **内存**:4GB 以上 +- **Docker**:19.03+ + +执行以下命令安装 Rancher: + +```shell +$ sudo docker run --privileged -d --restart=unless-stopped -p 80:80 -p 443:443 rancher/rancher +``` + +容器创建完成以后,通过浏览器访问主机的 IP 地址就能打开 Rancher 的管理界面。 + +![](../images/rancher-welcome.jpeg) + +## 创建 Kubernetes 集群 + +Rancher 安装成功以后,可以看到它已经在当前容器中部署了一个 K3s 集群,Rancher 相关资源都运行在这个内部的 K3s 集群中,无需理会这个集群。 + +接下来开始创建 Kubernetes 集群,在欢迎页面的 Cluster 部分点击 `Create` 创建集群。Rancher 支持在各大主流云计算平台创建 Kubernetes 集群,这里我们要在 Rancher 的宿主机上直接选择集群,因此选择 `Custom`。然后根据向导填写集群名称,选择 Kubernetes 版本即可。 + +![](../images/rancher-cluster-create.jpg) + +在 `Cluster Options` 页面中,选择要创建的节点角色,然后复制生成命令,在目标主机上执行即可。 + +![](../images/rancher-cluster-options.jpg) + +集群创建完成后,Rancher 的集群列表中会有状态显示。 + +![](../images/rancher-clusters.jpg) + +## 一键安装 JuiceFS CSI Driver + +在集群列表中点击进入创建的 Kubernetes 集群,左侧导航菜单点击展开 `应用市场` → `Chart 仓库`,点击 `创建` 按钮添加 JuiceFS CSI 仓库,填写: + +- **仓库名称**:juicefs +- **Index URL**:https://juicedata.github.io/juicefs-csi-driver/ + +![](../images/rancher-new-repo.jpg) + +创建以后,在仓库列表中可以看到刚刚添加的 JuiceFS CSI 仓库。 + +![](../images/rancher-repos.jpg) + +紧接着通过左侧菜单点击打开 `应用市场` → `Charts`,搜索栏中输入 `juicefs`,然后点击打开检索出的 `juicefs-csi-driver`。 + +![](../images/rancher-chart-search.jpg) + +在应用详情页面点击“安装”按钮,默认会安装最新版本,也可以点选切换到历史版本进行安装。 + +![](../images/rancher-chart-info.jpg) + +安装向导共有两步: + +### 第一步:设置应用的 `Namespace` + +JuiceFS CSI Driver 默认为 `kube-system`,这一步无需设置。 + +### 第二步:调整配置参数 + +这个页面提供了 YAML 编辑器,你可以根据需要调整 JuiceFS 相关的信息,通常只需要修改 `storageClasses` 部分,其中 `backend` 部分用来定义文件系统后端的数据库和对象存储,可以查阅[「JuiceFS 快速上手指南」](../getting-started/for_local.md)了解相关内容。如果你使用的是已经预先创建的文件系统,那么只需填写 `metaurl` 和 `name` 两项即可,例如: + +```yaml +... +storageClasses: + - backend: + accessKey: '' + bucket: '' + metaurl: 'redis://:mypasswd@efgh123.redis.rds.aliyuncs.com/1' + name: myjfs + secretKey: '' + storage: '' + enabled: true + name: juicefs-sc + reclaimPolicy: Retain +... +``` + +> **提示**:如果你有多个 JuiceFS 文件系统,分别需要关联到 Kubernetes 集群不同的 storageClass,可以在 `storageClasses` 数组后面再加 storageClass 配置项,注意修改存储类的名称,避免冲突。 + +点击「安装」,等待应用安装完成。 + +![](../images/rancher-chart-installed.jpg) + +## 使用 JuiceFS 持久化数据 + +部署应用时,在存储配置中指定 `juicefs-sc` 即可。 + +![](../images/rancher-pvc.jpg) diff --git a/docs/zh_cn/tutorials/juicefs_on_wsl.md b/docs/zh_cn/tutorials/juicefs_on_wsl.md new file mode 100644 index 0000000..77935a0 --- /dev/null +++ b/docs/zh_cn/tutorials/juicefs_on_wsl.md @@ -0,0 +1,167 @@ +--- +sidebar_label: 在 WSL 中使用 JuiceFS +--- + +# 在 WSL 中使用 JuiceFS + +WSL 全称 Windows Subsystem for Linux,即适用于 Linux 的 Windows 子系统。它可以让你在 Windows 系统环境下运行大多数 GNU/Linux 原生命令、工具和程序,且不必像用虚拟机或双系统那样产生额外的硬件开销。 + +## 安装 WSL + +使用 WSL 要求必须是 Windows 10 2004 以上或 Windows 11。 + +查看当前系统的版本,可以通过组合键 Win + R 唤出运行程序,输入并运行 `winver`。 + +![](../images/wsl/winver.png) + +确认 Windows 版本以后,以管理员身份打开 PowerShell 或 Windows 命令提示符,运行安装命令: + +```powershell +wsl --install +``` + +该命令会下载最新的 Linux 内核,安装并将 WSL 2 作为默认版本,并安装 Linux 发行版(默认为 Ubuntu)。 + +也可以直接指定要安装的发行版: + +```powershell +wsl --install -d ubuntu +``` + +:::tip 提示 +`wsl --list --online` 命令可以查看所有可选的发行版。 +::: + +## 设置 Linux 用户和密码 + +WSL 安装完成以后,即可在开始菜单找到新安装的 Linux 发行版。 + +![](../images/wsl/startmenu.png) + +点击 Ubuntu 子系统的快捷方式,WSL 会打开 Linux 子系统的终端。初次运行会要求设置管理 Linux 子系统的用户和密码,根据提示设置即可。 + +![](../images/wsl/init.png) + +这里设置的用户名和密码有以下几点需要注意: + +- 此用户专用于该 Linux 子系统的管理,与 Windows 系统中的用户无关; +- 此用户将作为 Linux 子系统的默认用户,并在启动时自动登录; +- 此用户将被视为 Linux 子系统的管理员,允许执行 `sudo` 命令; +- WSL 中允许同时运行多个 Linux 子系统,且每个子系统都需要设置一个管理用户。 + +## 在 WSL 中使用 JuiceFS + +在 WSL 中使用 JuiceFS,即是在 Linux 系统中使用 JuiceFS,这里以社区版为例进行介绍。 + +### 安装客户端 + +依次执行命令,在 Linux 子系统中安装 JuiceFS 客户端: + +1. 获取最新的版本号 + + ```shell + JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') + ``` + +2. 下载客户端到当前目录 + + ```shell + wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +3. 解压安装包 + + ```shell + tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" + ``` + +4. 安装客户端 + + ```shell + sudo install juicefs /usr/local/bin + ``` + +### 创建文件系统 + +JuiceFS 是数据与元数据分离的分布式文件系统,通常用对象存储作为数据存储,用 Redis、PostgreSQL 或 MySQL 作为元数据存储。这里假设已经准备了如下材料: + +#### 对象存储 + +查看「[JuiceFS 支持的数据存储](../reference/how_to_setup_object_storage.md)」 + +- **Bucket Endpoint**:`https://myjfs.oss-cn-shanghai.aliyuncs.com` +- **Access Key ID**:`ABCDEFGHIJKLMNopqXYZ` +- **Access Key Secret**:`ZYXwvutsrqpoNMLkJiHgfeDCBA` + +#### 数据库 + +查看「[JuiceFS 支持的元数据引擎](../reference/how_to_setup_metadata_engine.md)」 + +- **数据库地址**:`myjfs-sh-abc.redis.rds.aliyuncs.com:6379` +- **数据库密码**:`mypassword` + +将私密信息写入环境变量: + +```shell +$ export ACCESS_KEY=ABCDEFGHIJKLMNopqXYZ +$ export SECRET_KEY=ZYXwvutsrqpoNMLkJiHgfeDCBA +$ export REDIS_PASSWORD=mypassword +``` + +创建名为 `myjfs` 的文件系统: + +```shell +juicefs format \ + --storage oss \ + --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \ + redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \ + myjfs +``` + +### 挂载和使用 + +把数据库密码写入环境变量: + +```shell +export REDIS_PASSWORD=mypassword +``` + +:::note 注意 +对象存储的 API 密钥信息仅在创建文件系统时需要设置,一旦文件系统创建成功,相应的密钥信息会被写入数据库,JuiceFS 客户端会在挂载文件系统时自动从数据库中读取,无需重复设置。 +::: + +挂载文件系统到用户家目录下的 `mnt`: + +```shell +sudo juicefs mount -d redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 $HOME/mnt +``` + +如果需要从 Windows 系统访问 Linux 子系统中挂载的 JuiceFS 文件系统,在资源管理器左侧列表中找到 Linux 子系统,然后找到并打开挂载点路径即可。 + +![](../images/wsl/access-jfs-from-win.png) + +有关 JuiceFS 使用方面的更多内容请查阅官方文档。 + +## WSL 文件存储性能问题 + +WSL 打通了 Windows 与 Linux 子系统,允许二者相互访问彼此系统中存储的文件。 + +![](../images/wsl/windows-to-linux.png) + +但需要注意,从 Windows 访问 Linux 子系统或从 Linux 子系统访问 Windows 势必会因系统之间的转换而产生一定的性能开销。因此,推荐的做法是根据程序所在的系统来决定文件存储的位置,对于 Linux 子系统中的程序,它要处理的文件也应该存储在 Linux 子系统中性能才更理想。 + +在 Linux 子系统中,WSL 将 Windows 的各个盘符挂载到了 `/mnt`,比如 C: 盘在 Linux 子系统中的挂载点是 `/mnt/c`。 + +![](../images/wsl/mount-point.png) + +为了保证性能最优,在 WSL 中使用 JuiceFS 时,不论存储还是缓存路径都应设置在 Linux 子系统中。换言之,应该避免把存储或缓存设置在 `/mnt/c` 类似的 Windows 分区挂载点上。 + +通过使用 JuiceFS 自带的 `bench` 基准测试工具,结果显示,将文件系统挂载到 Windows(如 `/mnt/c`)的性能要比挂载到 Linux 子系统内部(如 `$HOME/mnt`)低 30% 左右。 + +## 已知问题 + +当通过 Windows 资源管理器拷贝文件到 Linux 子系统时,WSL 会自动为每个文件附加一个带有 `Zone.Identifier` 标识的同名文件。这是 NTFS 文件系统的一种安全防护机制,意在对外部文件的来源进行跟踪,但对于 WSL 来说,这个功能应该属于 bug 且已经有人在 GitHub 上向微软开发团队反馈 [#7456](https://github.com/microsoft/WSL/issues/7456)。 + +受此问题影响,通过 Windows 资源管理器向 Linux 子系统中挂载的 JuiceFS 文件系统存入文件时也会出现同样的问题。但在 Linux 子系统内部读写 JuiceFS 文件系统不受该 bug 的干扰。 + +![](../images/wsl/zone-identifier.png) diff --git a/docs/zh_cn/tutorials/qcloud.md b/docs/zh_cn/tutorials/qcloud.md new file mode 100644 index 0000000..9f9cbaf --- /dev/null +++ b/docs/zh_cn/tutorials/qcloud.md @@ -0,0 +1,308 @@ +--- +sidebar_label: 在腾讯云使用 JuiceFS +sidebar_position: 5 +slug: /clouds/qcloud +--- + +# 在腾讯云安装和使用 JuiceFS 存储 + +如下图所示,JuiceFS 存储由数据库和对象存储共同驱动。存入 JuiceFS 的文件会按照一定的规则被拆分成固定大小的数据块存储在对象存储中,数据对应的元数据则会存储在数据库中。 + +元数据完全独立存储,对文件的检索和处理并不会直接操作对象存储中的数据,而是先在数据库中操作元数据,只有当数据发生变化的时候,才会与对象存储交互。 + +这样的设计可以有效缩减对象存储在请求数量上的费用,同时也能让我们显著感受到 JuiceFS 带来的性能提升。 + +![](../images/juicefs-qcloud.png) + +## 准备 + +通过前面的架构描述,可以知道 JuiceFS 需要搭配数据库和对象存储一起使用。这里我们直接使用腾讯云的 CVM 云服务器,结合云数据库和 COS 对象存储。 + +在创建云计算资源时,尽量选择在相同的区域,这样可以让资源之间通过内网线路相互访问,避免使用公网线路产生额外的流量费用。 + +### 一、云服务器 CVM + +JuiceFS 对服务器硬件没有特殊要求,一般来说,云平台上最低配的云服务器也能稳定使用 JuiceFS,通常你只需要选择能够满足自身业务的配置即可。 + +需要特别说明的是,你不需要为使用 JuiceFS 重新购买服务器或是重装系统,JuiceFS 没有业务入侵性,不会对你现有的系统和程序造成任何的干扰,你完全可以在正在运行的服务器上安装和使用 JuiceFS。 + +JuiceFS 默认会占用不超过 1GB 的硬盘空间作为缓存,可以根据需要调整缓存空间的大小。该缓存是客户端与对象存储之间的一个数据缓冲层,选择性能更好的云盘,可以获得更好的性能表现。 + +在操作系统方面,腾讯云 CVM 提供的所有操作系统都可以安装 JuiceFS。 + +**本文使用的 CVM 配置如下:** + +| 服务器配置 | | +| ------------ | ------------------------ | +| **CPU** | 1 核 | +| **内存** | 2 GB | +| **存储** | 50 GB | +| **操作系统** | Ubuntu Server 20.04 64位 | +| **地域** | 上海五区 | + +### 二、云数据库 + +JuiceFS 会将数据对应的元数据全部存储在独立的数据库中,目前已开放支持的数据库有 Redis、MySQL、PostgreSQL、TiKV 和 SQLite。 + +根据数据库类型的不同,带来的元数据性能和可靠性表现也各不相同。比如 Redis 是完全运行在内存上的,它能提供极致的性能,但运维难度较高,可靠性相对低。而 MySQL、PostgreSQL 是关系型数据库,性能不如 Redis,但运维难度不高,可靠性也有一定的保障。SQLite 是单机单文件关系型数据库,性能较低,也不适合用于大规模数据存储,但它免配置,适合单机少量数据存储的场景。 + +如果只是为了评估 JuiceFS 的功能,你可以在 CVM 云服务器手动搭建数据库使用。当你要在生产环境使用 JucieFS 时,如果没有专业的数据库运维团队,腾讯云的云数据库服务通常是更好的选择。 + +当然,如果你愿意,也可以使用其他云平台上提供的云数据库服务。但在这种情况下,你只能通过公网访问云数据库,也就是说,你必须向公网暴露数据库的端口,这存在极大的安全风险,最好不要这样使用。 + +如果必须通过公网访问数据库,可以通过云数据库控台提供的白名单功能,严格限制允许访问数据库的 IP 地址,从而提升数据的安全性。从另一个角度说,如果你通过公网无法成功连接云数据库,那么可以检查数据库的白名单,检查是不是该设置限制了你的访问。 + +| 数据库 | Redis | MySQL、PostgreSQL | SQLite | +| :----------: | :----------------------: | :------------------------: | :--------------------: | +| **性能** | 强 | 适中 | 弱 | +| **运维门槛** | 高 | 适中 | 低 | +| **可靠性** | 低 | 适中 | 低 | +| **应用场景** | 海量数据、分布式高频读写 | 海量数据、分布式中低频读写 | 少量数据单机中低频读写 | + +> **注意**:如果使用 JuiceFS 的[托管服务](https://juicefs.com/docs/zh/hosted_service.html),则无需单独准备数据库。 + +**本文使用了云数据库 TencentDB Redis,通过 VPC 私有网络与 CVM 云服务器交互访问:** + +| Redis 版本 | 5.0 社区版 | +| ------------ | ---------------- | +| **实例规格** | 1GB 内存版(标准架构) | +| **连接地址** | 192.168.5.5:6379 | +| **可用区** | 上海五区 | + +注意,数据库的连接地址取决于你创建的 VPC 网络设置,创建 Redis 实例时会自动在你定义的网段中获取地址。 + +![](../images/qcloud-redis-network.png) + +### 三、对象存储 COS + +JuiceFS 会将所有的数据都存储到对象存储中,它支持几乎所有的对象存储服务。但为了获得最佳的性能,当使用腾讯云 CVM 时,搭配腾讯云 COS 对象存储通常是最优选择。不过请注意,将 CVM 和 COS Bucket 选择在相同的地区,这样才能通过腾讯云的内网线路进行访问,不但延时低,而且不需要额外的流量费用。 + +> **提示**:腾讯云对象存储 COS 提供的唯一访问地址同时支持内网和外网访问,当通过内网访问时,COS 会自动解析到内网 IP,此时产生的流量均为内网流量,不会产生流量费用。 + +当然,如果你愿意,也可以使用其他云平台提供的对象存储服务,但不推荐这样做。首先,通过腾讯云 CVM 访问其他云平台的对象存储要走公网线路,对象存储会产生流量费用,而且这样的访问延时相比也会更高,可能会影响 JuiceFS 的性能发挥。 + +腾讯云 COS 有不同的存储级别,由于 JuiceFS 需要与对象存储频繁交互,建议使用标准存储。你可以搭配 COS 资源包使用,降低对象存储的使用成本。 + +### API 访问秘钥 + +腾讯云 COS 需要通过 API 进行访问,你需要准备访问秘钥,包括 `Access Key ID` 和 `Access Key Secret` ,[点此查看](https://cloud.tencent.com/document/product/598/37140)获取方式。 + +> **安全建议**:显式使用 API 访问秘钥可能导致密钥泄露,推荐为云服务器分配 [CAM 服务角色](https://cloud.tencent.com/document/product/598/19420)。当一台 CVM 被授予 COS 操作权限以后,无需使用 API 访问秘钥即可访问 COS。 + +## 安装 + +我当前使用的是 Ubuntu Server 20.04 64 位系统,依次执行以下命令可以下载最新版本客户端。你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本。 + +```shell +$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') +``` + +```shell +$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" +``` + +下载完成以后,解压程序到 `juice` 文件夹: + +```shell +$ mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice +``` + +将 JuiceFS 客户端安装到 `/usr/local/bin` : + +```shell +$ sudo install juice/juicefs /usr/local/bin +``` + +执行命令,看到返回 `juicefs` 的命令帮助信息,代表客户端安装成功。 + +```shell +$ juicefs +NAME: + juicefs - A POSIX file system built on Redis and object storage. + +USAGE: + juicefs [global options] command [command options] [arguments...] + +VERSION: + 0.15.2 (2021-07-07T05:51:36Z 4c16847) + +COMMANDS: + format format a volume + mount mount a volume + umount unmount a volume + gateway S3-compatible gateway + sync sync between two storage + rmr remove directories recursively + info show internal information for paths or inodes + bench run benchmark to read/write/stat big/small files + gc collect any leaked objects + fsck Check consistency of file system + profile analyze access log + status show status of JuiceFS + warmup build cache for target directories/files + dump dump metadata into a JSON file + load load metadata from a previously dumped JSON file + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --verbose, --debug, -v enable debug log (default: false) + --quiet, -q only warning and errors (default: false) + --trace enable trace log (default: false) + --no-agent Disable pprof (:6060) and gops (:6070) agent (default: false) + --help, -h show help (default: false) + --version, -V print only the version (default: false) + +COPYRIGHT: + Apache License 2.0 +``` + +JuiceFS 具有良好的跨平台兼容性,同时支持在 Linux、Windows 和 macOS 上使用。本文着重介绍 JuiceFS 在 Linux 系统上的安装和使用,如果你需要了解其他系统上的安装方法,请[查阅文档](../getting-started/installation.md)。 + +## 创建 JuiceFS 存储 + +JuiceFS 客户端安装好以后,现在就可以使用前面准备好的 Redis 数据库和 COS 对象存储来创建 JuiceFS 存储了。 + +严格意义上说,这一步操作应该叫做 “Format a volume”,即格式化一个卷。但考虑到有很多用户可能不了解或者不关心文件系统的标准术语,所以简单起见,我们就直白的把这个过程叫做“创建 JuiceFS 存储”。 + +以下命令使用 JuiceFS 客户端提供的 `format` 子命令创建了一个名为 `mystor` 的存储,即文件系统: + +```shell +$ juicefs format \ + --storage cos \ + --bucket https:// \ + --access-key \ + --secret-key \ + redis://:@192.168.5.5:6379/1 \ + mystor +``` + +**选项说明:** + +- `--storage`:指定对象存储类型,[点此查看](../reference/how_to_setup_object_storage.md#%E6%94%AF%E6%8C%81%E7%9A%84%E5%AD%98%E5%82%A8%E6%9C%8D%E5%8A%A1) JuiceFS 支持的对象存储。 +- `--bucket`:对象存储的 Bucket 访问域名,可以在 COS 的管理控制台找到。 + ![cos-bucket-url](../images/cos-bucket-url.png) +- `--access-key` 和 `--secret-key`:访问对象存储 API 的秘钥对,[点此查看](https://cloud.tencent.com/document/product/598/37140)获取方式。 + +> Redis 6.0 身份认证需要用户名和密码两个参数,地址格式为 `redis://username:password@redis-server-url:6379/1`。目前腾讯云数据库 Redis 版只提供 Reids 4.0 和 5.0 两个版本,认证身份只需要密码,在设置 Redis 服务器地址时只需留空用户名即可,例如:`redis://:password@redis-server-url:6379/1` + +看到类似下面的输出,代表文件系统创建成功了。 + +```shell +2021/07/30 11:44:31.904157 juicefs[44060] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:44:31.907083 juicefs[44060] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:44:31.907634 juicefs[44060] : Ping redis: 474.98µs +2021/07/30 11:44:31.907850 juicefs[44060] : Data uses cos://juice-0000000000/mystor/ +2021/07/30 11:44:32.149692 juicefs[44060] : Volume is formatted as {Name:mystor UUID:dbf05314-57af-4a2c-8ac1-19329d73170c Storage:cos Bucket:https://juice-0000000000.cos.ap-shanghai.myqcloud.com AccessKey:AKIDGLxxxxxxxxxxxxxxxxxxZ8QRBdpkOkp SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:} +``` + +## 挂载 JuiceFS 存储 + +文件系统创建完成,对象存储相关的信息会被存入数据库,挂载时无需再输入对象存储的 Bucket 和秘钥等信息。 + +使用 `mount` 子命令,将文件系统挂载到 `/mnt/jfs` 目录: + +```shell +$ sudo juicefs mount -d redis://:@192.168.5.5:6379/1 /mnt/jfs +``` + +> **注意**:挂载文件系统时,只需填写 Redis 数据库地址,不需要文件系统名称。默认的缓存路径为 `/var/jfsCache`,请确保当前用户有足够的读写权限。 + +看到类似下面的输出,代表文件系统挂载成功。 + +```shell +2021/07/30 11:49:56.842211 juicefs[44175] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:49:56.845100 juicefs[44175] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:49:56.845562 juicefs[44175] : Ping redis: 383.157µs +2021/07/30 11:49:56.846164 juicefs[44175] : Data use cos://juice-0000000000/mystor/ +2021/07/30 11:49:56.846731 juicefs[44175] : Disk cache (/var/jfsCache/dbf05314-57af-4a2c-8ac1-19329d73170c/): capacity (1024 MB), free ratio (10%), max pending pages (15) +2021/07/30 11:49:57.354763 juicefs[44175] : OK, mystor is ready at /mnt/jfs +``` + +使用 `df` 命令,可以看到文件系统的挂载情况: + +```shell +$ df -Th +文件系统 类型 容量 已用 可用 已用% 挂载点 +JuiceFS:mystor fuse.juicefs 1.0P 64K 1.0P 1% /mnt/jfs +``` + +文件系统挂载成功以后,现在就可以像使用本地硬盘那样,在 `/mnt/jfs` 目录中存储数据了。 + +> **多主机共享**:JuiceFS 存储支持被多台云服务器同时挂载使用,你可以在其他 CVM 上安装 JuiceFS 客户端,然后使用 `redis://:@192.168.5.5:6379/1` 数据库地址挂载文件系统到每一台主机上。 + +## 查看文件系统状态 + +使用 JuiceFS 客户端的 `status` 子命令可以查看一个文件系统的基本信息和连接状态。 + +```shell +$ juicefs status redis://:@192.168.5.5:6379/1 + +2021/07/30 11:51:17.864767 juicefs[44196] : Meta address: redis://@192.168.5.5:6379/1 +2021/07/30 11:51:17.866619 juicefs[44196] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/07/30 11:51:17.867092 juicefs[44196] : Ping redis: 379.391µs +{ + "Setting": { + "Name": "mystor", + "UUID": "dbf05314-57af-4a2c-8ac1-19329d73170c", + "Storage": "cos", + "Bucket": "https://juice-0000000000.cos.ap-shanghai.myqcloud.com", + "AccessKey": "AKIDGLxxxxxxxxxxxxxxxxx8QRBdpkOkp", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0 + }, + "Sessions": [ + { + "Sid": 1, + "Heartbeat": "2021-07-30T11:49:56+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "VM-5-6-ubuntu", + "MountPoint": "/mnt/jfs", + "ProcessID": 44175 + }, + { + "Sid": 3, + "Heartbeat": "2021-07-30T11:50:56+08:00", + "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)", + "Hostname": "VM-5-6-ubuntu", + "MountPoint": "/mnt/jfs", + "ProcessID": 44185 + } + ] +} +``` + +## 卸载 JuiceFS 存储 + +使用 JuiceFS 客户端提供的 `umount` 命令即可卸载文件系统,比如: + +```shell +$ sudo juicefs umount /mnt/jfs +``` + +> **注意**:强制卸载使用中的文件系统可能导致数据损坏或丢失,请务必谨慎操作。 + +## 开机自动挂载 + +如果你不想每次重启系统都要重新手动挂载 JuiceFS 存储,可以设置自动挂载文件系统。 + +首先,需要将 `juicefs` 客户端重命名为 `mount.juicefs` 并复制到 `/sbin/` 目录: + +```shell +$ sudo cp juice/juicefs /sbin/mount.juicefs +``` + +编辑 `/etc/fstab` 配置文件,新增一条记录: + +```shell +redis://:@192.168.5.5:6379/1 /mnt/jfs juicefs _netdev,cache-size=20480 0 0 +``` + +挂载选项中 `cache-size=20480` 代表分配 20GB 本地磁盘空间作为 JuiceFS 的缓存使用,请根据你实际的 CVM 硬盘容量去决定分配的缓存大小。一般来说,为 JuiceFS 分配更大的缓存空间,可以获得更好的性能表现。 + +你可以根据需要调整上述配置中的 FUSE 挂载选项,更多内容请[查阅文档](../reference/fuse_mount_options.md)。 + +> **注意**:请将上述配置文件中的 Redis 地址、挂载点以及挂载选项,替换成你实际的信息。 diff --git a/docs/zh_cn/windows.md b/docs/zh_cn/windows.md new file mode 100644 index 0000000..e672c27 --- /dev/null +++ b/docs/zh_cn/windows.md @@ -0,0 +1,64 @@ +# Using JuiceFS on Windows + + +## Install dependencies + +JuiceFS depends on [WinFsp](http://www.secfs.net/winfsp/rel), please install it first. + + +## Build JuiceFS from source + +We can cross compile JuiceFS for Windows platform on Linux or macOS. + +1. Install [mingw-w64](http://mingw-w64.org) on Linux or macOS. + + On Linux, it can be installed using the distro's package manager like `yum` or `apt`. + + On macOS, use [Homebrew](https://brew.sh) to install: `brew install mingw-w64` + +2. Build JuiceFS for Windows: + +```bash +$ git clone https://github.com/juicedata/juicefs.git +$ cd juicefs +$ make juicefs.exe +``` + + +## Use JuiceFS + +### Start Redis Server + +JuiceFS requires a Redis, there is a [Windows version of Redis](https://github.com/tporadowski/redis), +please download the latest release and launch the Redis server. + + +### Format JuiceFS + +For test purpose, we can use a local disk to simulate an object store: + +``` +PS C:\> .\juicefs.exe format localhost test +2021/03/22 15:16:18.003547 juicefs[7064] : Meta address: redis://localhost +2021/03/22 15:16:18.022972 juicefs[7064] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/03/22 15:16:18.024710 juicefs[7064] : Data use file:///C:/jfs/local/test/ +``` + +For other supported object storage, please check out ["How to Setup Object Storage"](reference/how_to_setup_object_storage.md). + +### Mount JuiceFS + +Select an unused drive letter, such as `Z:`, then execute: + +``` +PS C:\> .\juicefs.exe mount localhost Z: +2021/03/22 15:16:18.003547 juicefs[7064] : Meta address: redis://localhost +2021/03/22 15:16:18.022972 juicefs[7064] : AOF is not enabled, you may lose data if Redis is not shutdown properly. +2021/03/22 15:16:18.024710 juicefs[7064] : Data use file:///C:/jfs/local/test/ +2021/03/22 15:16:18.024710 juicefs[7064] : Cache: C:\Users\bob\.juicefs\cache\7088b6fa-ef2b-4792-b6c9-98fcdd6d45fb capacity: 1024 MB +The service juicefs has been started. +``` + +Then we can use JuiceFS as a shared disk drive `Z:`, it looks like: + +![JuiceFS on Windows](images/juicefs-on-windows.png) diff --git a/fstests/Makefile b/fstests/Makefile new file mode 100644 index 0000000..45f9543 --- /dev/null +++ b/fstests/Makefile @@ -0,0 +1,38 @@ +DURATION ?= 10 + +all: fsracer fsx xattrs + +xattrs: + touch /jfs/test_xattrs + setfattr -n user.k -v value /jfs/test_xattrs + getfattr -n user.k /jfs/test_xattrs | grep -q user.k= + +fsracer: healthcheck secfs.test/tools/bin/fsracer + secfs.test/tools/bin/fsracer $(DURATION) /jfs >fsracer.log + make healthcheck + +fsx: healthcheck secfs.test/tools/bin/fsx + secfs.test/tools/bin/fsx -d $(DURATION) -p 10000 -F 100000 /jfs/fsx.out + make healthcheck + +setup: + redis-server & + mkdir -p /jfs + ../juicefs format localhost unittest + ../juicefs mount -d --no-usage-report --enable-xattr localhost /jfs + +healthcheck: + pgrep juicefs + +secfs.test/tools/bin/fsx: secfs.test + +secfs.test/tools/bin/fsracer: secfs.test + +secfs.test: + git clone https://github.com/billziss-gh/secfs.test.git + make -C secfs.test >secfs.test-build.log 2>&1 + +flock: + git clone https://github.com/gofrs/flock.git + mkdir /jfs/tmp + cd flock && go mod init github.com/gofrs/flock.git && go mod tidy && TMPDIR=/jfs/tmp go test . diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6a7210d --- /dev/null +++ b/go.mod @@ -0,0 +1,90 @@ +module github.com/juicedata/juicefs + +go 1.16 + +require ( + cloud.google.com/go v0.39.0 + github.com/Arvintian/scs-go-sdk v1.1.0 + github.com/Azure/azure-sdk-for-go v11.1.1-beta+incompatible + github.com/Azure/go-autorest/autorest v0.11.17 // indirect + github.com/DataDog/zstd v1.4.5 + github.com/IBM/ibm-cos-sdk-go v1.6.0 + github.com/NetEase-Object-Storage/nos-golang-sdk v0.0.0-20171031020902-cc8892cb2b05 + github.com/agiledragon/gomonkey/v2 v2.2.0 + github.com/aliyun/aliyun-oss-go-sdk v2.1.0+incompatible + github.com/aws/aws-sdk-go v1.35.20 + github.com/baidubce/bce-sdk-go v0.9.47 + github.com/baiyubin/aliyun-sts-go-sdk v0.0.0-20180326062324-cfa1a18b161f // indirect + github.com/billziss-gh/cgofuse v1.4.0 + github.com/ceph/go-ceph v0.4.0 + github.com/colinmarc/hdfs/v2 v2.2.0 + github.com/deckarep/golang-set v1.7.1 // indirect + github.com/dgraph-io/badger/v3 v3.2103.2 + github.com/dnaeon/go-vcr v1.2.0 // indirect + github.com/emersion/go-webdav v0.3.0 + github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377 // indirect + github.com/go-redis/redis/v8 v8.4.0 + github.com/go-sql-driver/mysql v1.6.0 + github.com/gofrs/flock v0.8.1 + github.com/golang-jwt/jwt v3.2.2+incompatible // indirect + github.com/google/btree v1.0.1 + github.com/google/gops v0.3.13 + github.com/google/readahead v0.0.0-20161222183148-eaceba169032 // indirect + github.com/google/uuid v1.1.2 + github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8 + github.com/hashicorp/consul/api v1.11.0 + github.com/hashicorp/go-hclog v0.14.1 + github.com/huaweicloud/huaweicloud-sdk-go-obs v3.21.1+incompatible + github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099 + github.com/jcmturner/gokrb5/v8 v8.4.2 + github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d + github.com/juju/ratelimit v1.0.1 + github.com/kr/fs v0.1.0 // indirect + github.com/ks3sdklib/aws-sdk-go v1.0.12 + github.com/lib/pq v1.8.0 + github.com/mattn/go-isatty v0.0.12 + github.com/mattn/go-sqlite3 v2.0.1+incompatible + github.com/minio/cli v1.22.0 + github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c + github.com/minio/minio-go v6.0.14+incompatible + github.com/nats-io/nats-server/v2 v2.6.2 // indirect + github.com/ncw/swift v1.0.53 + github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7 + github.com/pkg/errors v0.9.1 + github.com/pkg/sftp v1.10.0 + github.com/pkg/xattr v0.4.4 + github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7 // indirect + github.com/prometheus/client_golang v1.9.0 + github.com/prometheus/client_model v0.2.0 + github.com/qingstor/qingstor-sdk-go/v4 v4.4.0 + github.com/qiniu/api.v7/v7 v7.8.0 + github.com/satori/go.uuid v1.2.0 + github.com/satori/uuid v1.2.0 // indirect + github.com/shirou/gopsutil v3.21.3+incompatible // indirect + github.com/sirupsen/logrus v1.7.0 + github.com/smartystreets/goconvey v1.6.4 + github.com/tencentyun/cos-go-sdk-v5 v0.7.8 + github.com/tidwall/gjson v1.9.3 // indirect + github.com/tikv/client-go/v2 v2.0.0-rc + github.com/tklauser/go-sysconf v0.3.6 // indirect + github.com/upyun/go-sdk/v3 v3.0.2 + github.com/urfave/cli/v2 v2.3.0 + github.com/vbauerster/mpb/v7 v7.0.3 + github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8 + golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e + golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4 + golang.org/x/oauth2 v0.0.0-20190517181255-950ef44c6e07 + golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e + golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 + google.golang.org/api v0.5.0 + gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216 + xorm.io/xorm v1.0.7 +) + +replace github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c => github.com/juicedata/minio v0.0.0-20210222051636-e7cabdf948f4 + +replace github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8 => github.com/juicedata/go-fuse/v2 v2.1.1-0.20210926080226-cfe1ec802a7f + +replace github.com/golang-jwt/jwt v3.2.2+incompatible => github.com/dgrijalva/jwt-go v3.2.0+incompatible + +replace github.com/dgrijalva/jwt-go v3.2.0+incompatible => github.com/golang-jwt/jwt v3.2.2+incompatible diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f63a4a4 --- /dev/null +++ b/go.sum @@ -0,0 +1,1415 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.39.0 h1:UgQP9na6OTfp4dsAiz/eFpFA1C6tPdH5wiRdi19tuMw= +cloud.google.com/go v0.39.0/go.mod h1:rVLT6fkc8chs9sfPtFc1SBH6em7n+ZoXaG+87tDISts= +git.apache.org/thrift.git v0.13.0 h1:/3bz5WZ+sqYArk7MBBBbDufMxKKOA56/6JO6psDpUDY= +git.apache.org/thrift.git v0.13.0/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= +gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:lSA0F4e9A2NcQSqGqTOXqu2aRi/XEQxDCBwM8yJtE6s= +gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:EXuID2Zs0pAQhH8yz+DNjUbjppKQzKFAn28TMYPB6IU= +github.com/Arvintian/scs-go-sdk v1.1.0 h1:vqVOfoMD6XSr7eG1a2M9oSiQwhDZYKKdH2rrZRPx6So= +github.com/Arvintian/scs-go-sdk v1.1.0/go.mod h1:DMIkwn27iuTIo9o7INj3L/bcA7bW6QwljWC3ZpxjkXw= +github.com/Azure/azure-pipeline-go v0.2.2 h1:6oiIS9yaG6XCCzhgAgKFfIWyo4LLCiDhZot6ltoThhY= +github.com/Azure/azure-pipeline-go v0.2.2/go.mod h1:4rQ/NZncSvGqNkkOsNpOU1tgoNuIlp9AfUH5G1tvCHc= +github.com/Azure/azure-sdk-for-go v11.1.1-beta+incompatible h1:UanIfAyKxwQgLNfs8LIVfWsSB6JaA0Bj5grnEldOFok= +github.com/Azure/azure-sdk-for-go v11.1.1-beta+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-storage-blob-go v0.10.0 h1:evCwGreYo3XLeBV4vSxLbLiYb6e0SzsJiXQVRGsRXxs= +github.com/Azure/azure-storage-blob-go v0.10.0/go.mod h1:ep1edmW+kNQx4UfWM9heESNmQdijykocJ0YOxmMX8SE= +github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI= +github.com/Azure/go-autorest/autorest v0.11.17 h1:2zCdHwNgRH+St1J+ZMf66xI8aLr/5KMy+wWLH97zwYM= +github.com/Azure/go-autorest/autorest v0.11.17/go.mod h1:eipySxLmqSyC5s5k1CLupqet0PSENBEDP93LQ9a8QYw= +github.com/Azure/go-autorest/autorest/adal v0.5.0/go.mod h1:8Z9fGy2MpX0PvDjB1pEgQTmVqjGhiHBW7RJJEciWzS0= +github.com/Azure/go-autorest/autorest/adal v0.8.3/go.mod h1:ZjhuQClTqx435SRJ2iMlOxPYt3d2C/T/7TiQCVZSn3Q= +github.com/Azure/go-autorest/autorest/adal v0.9.1/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= +github.com/Azure/go-autorest/autorest/adal v0.9.5 h1:Y3bBUV4rTuxenJJs41HU3qmqsb+auo+a3Lz+PlJPpL0= +github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= +github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjWF/7vwR+cUD/ELuzDCXwHUVA= +github.com/Azure/go-autorest/autorest/date v0.2.0/go.mod h1:vcORJHLJEh643/Ioh9+vPmf1Ij9AEBM5FuBIXLmIy0g= +github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= +github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= +github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= +github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= +github.com/Azure/go-autorest/autorest/mocks v0.3.0/go.mod h1:a8FDP3DYzQ4RYfVAxAN3SVSiiO77gL2j2ronKKP0syM= +github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/mocks v0.4.1 h1:K0laFcLE6VLTOwNgSxaGbUcLPuGXlNkbVvq4cW4nIHk= +github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/logger v0.1.0/go.mod h1:oExouG+K6PryycPJfVSxi/koC6LSNgds39diKLz7Vrc= +github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE= +github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= +github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk= +github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= +github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= +github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c h1:/IBSNwUN8+eKzUzbJPqhK839ygXJ82sde8x3ogr6R28= +github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= +github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= +github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/IBM/ibm-cos-sdk-go v1.6.0 h1:09jPKbUZw5XX6YT0cpQHpjS0Lq+YZrv3+ApQy/skKKc= +github.com/IBM/ibm-cos-sdk-go v1.6.0/go.mod h1:Pa7XzoyngeWPnqGol8ZF+gwUeLEAyDHkkgIJ79dXARY= +github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= +github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= +github.com/NetEase-Object-Storage/nos-golang-sdk v0.0.0-20171031020902-cc8892cb2b05 h1:NEPjpPSOSDDmnix+VANw/CfUs1fAorLIaz/IFz2eQ2o= +github.com/NetEase-Object-Storage/nos-golang-sdk v0.0.0-20171031020902-cc8892cb2b05/go.mod h1:0N5CbwYI/8V1T6YOEwkgMvLmiGDNn661vLutBZQrC2c= +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/QcloudApi/qcloud_sign_golang v0.0.0-20141224014652-e4130a326409/go.mod h1:1pk82RBxDY/JZnPQrtqHlUFfCctgdorsd9M06fMynOM= +github.com/ReneKroon/ttlcache/v2 v2.3.0/go.mod h1:zbo6Pv/28e21Z8CzzqgYRArQYGYtjONRxaAKGxzQvG4= +github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= +github.com/Shopify/sarama v1.27.2/go.mod h1:g5s5osgELxgM+Md9Qni9rzo7Rbt+vvFQI4bt/Mc93II= +github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= +github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= +github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= +github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= +github.com/VividCortex/mysqlerr v0.0.0-20200629151747-c28746d985dd/go.mod h1:f3HiCrHjHBdcm6E83vGaXh1KomZMA2P6aeo3hKx/wg0= +github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502/go.mod h1:pmnBM9bxWSiHvC/gSWunUIyDvGn33EkP2CUjxFKtTTM= +github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= +github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= +github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= +github.com/agiledragon/gomonkey/v2 v2.2.0 h1:QJWqpdEhGV/JJy70sZ/LDnhbSlMrqHAWHcNOjz1kyuI= +github.com/agiledragon/gomonkey/v2 v2.2.0/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY= +github.com/alecthomas/participle v0.2.1 h1:4AVLj1viSGa4LG5HDXKXrm5xRx19SB/rS/skPQB1Grw= +github.com/alecthomas/participle v0.2.1/go.mod h1:SW6HZGeZgSIpcUWX3fXpfZhuaWHnmoD5KCVaqSaNTkk= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/aliyun/aliyun-oss-go-sdk v2.1.0+incompatible h1:90Z2Cp7EqcbaYfVwVjmQoK8kgoFPz+doQlujcwe1BRg= +github.com/aliyun/aliyun-oss-go-sdk v2.1.0+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= +github.com/alvaroloes/enumer v1.1.2/go.mod h1:FxrjvuXoDAx9isTJrv4c+T410zFi0DtXIT0m65DJ+Wo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= +github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= +github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= +github.com/appleboy/gin-jwt/v2 v2.6.3/go.mod h1:MfPYA4ogzvOcVkRwAxT7quHOtQmVKDpTwxyUrC2DNw0= +github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= +github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= +github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= +github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/aws/aws-sdk-go v1.35.3/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/aws/aws-sdk-go v1.35.20 h1:Hs7x9Czh+MMPnZLQqHhsuZKeNFA3Vuf7pdy2r5QlVb0= +github.com/aws/aws-sdk-go v1.35.20/go.mod h1:tlPOdRjfxPBpNIwqDj61rmsnA85v9jc0Ps9+muhnW+k= +github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= +github.com/baidubce/bce-sdk-go v0.9.47 h1:wonKDMqevZoHx+/XW7q/8p1nP5yEpbvxgMdUfPDhiZM= +github.com/baidubce/bce-sdk-go v0.9.47/go.mod h1:zbYJMQwE4IZuyrJiFO8tO8NbtYiKTFTbwh4eIsqjVdg= +github.com/baiyubin/aliyun-sts-go-sdk v0.0.0-20180326062324-cfa1a18b161f h1:ZNv7On9kyUzm7fvRZumSyy/IUiSC7AzL0I1jKKtwooA= +github.com/baiyubin/aliyun-sts-go-sdk v0.0.0-20180326062324-cfa1a18b161f/go.mod h1:AuiFmCCPBSrqvVMvuqFuk0qogytodnVFVSN5CeJB8Gc= +github.com/bcicen/jstream v1.0.1 h1:BXY7Cu4rdmc0rhyTVyT3UkxAiX3bnLpKLas9btbH5ck= +github.com/bcicen/jstream v1.0.1/go.mod h1:9ielPxqFry7Y4Tg3j4BfjPocfJ3TbsRtXOAYXYmRuAQ= +github.com/beevik/ntp v0.3.0 h1:xzVrPrE4ziasFXgBVBZJDP0Wg/KpMwk2KHJ4Ba8GrDw= +github.com/beevik/ntp v0.3.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg= +github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/billziss-gh/cgofuse v1.4.0 h1:kju2jDmdNuDDCrxPob2ggmZr5Mj/odCjU1Y8kx0Th9E= +github.com/billziss-gh/cgofuse v1.4.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= +github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= +github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= +github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= +github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/ceph/go-ceph v0.4.0 h1:KJsT6j1IbsEtui3ZtDcZO//uZ+IVBNT6KO7u9PuMovE= +github.com/ceph/go-ceph v0.4.0/go.mod h1:wd+keAOqrcsN//20VQnHBGtnBnY0KHl0PA024Ng8HfQ= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo= +github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= +github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= +github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= +github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/colinmarc/hdfs/v2 v2.2.0 h1:4AaIlTq+/sWmeqYhI0dX8bD4YrMQM990tRjm636FkGM= +github.com/colinmarc/hdfs/v2 v2.2.0/go.mod h1:Wss6n3mtaZyRwWaqtSH+6ge01qT0rw9dJJmvoUnIQ/E= +github.com/coredns/coredns v1.4.0 h1:RubBkYmkByUqZWWkjRHvNLnUHgkRVqAWgSMmRFvpE1A= +github.com/coredns/coredns v1.4.0/go.mod h1:zASH/MVDgR6XZTbxvOnsZfffS+31vg6Ackf/wo1+AM0= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= +github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/corona10/goimagehash v1.0.2/go.mod h1:/l9umBhvcHQXVtQO1V6Gp1yD20STawkhRnnX0D1bvVI= +github.com/cpuguy83/go-md2man v1.0.10 h1:BSKMNlYxDvnunlTymqtgONjNnaRV1sTpcovwwjF22jk= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso= +github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dchest/siphash v1.2.1 h1:4cLinnzVJDKxTCl9B01807Yiy+W7ZzVHj/KIroQRvT4= +github.com/dchest/siphash v1.2.1/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4= +github.com/deckarep/golang-set v1.7.1 h1:SCQV0S6gTtp6itiFrTqI+pfmJ4LN85S1YzhDf9rTHJQ= +github.com/deckarep/golang-set v1.7.1/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14yDtF28KmMOgQ= +github.com/denisenkom/go-mssqldb v0.0.0-20200428022330-06a60b6afbbc/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU= +github.com/dgraph-io/badger v1.6.2 h1:mNw0qs90GVgGGWylh0umH5iag1j6n/PeJtNvL6KY/x8= +github.com/dgraph-io/badger/v3 v3.2103.2 h1:dpyM5eCJAtQCBcMCZcT4UBZchuTJgCywerHHgmxfxM8= +github.com/dgraph-io/badger/v3 v3.2103.2/go.mod h1:RHo4/GmYcKKh5Lxu63wLEMHJ70Pac2JqZRYGhlyAo2M= +github.com/dgraph-io/ristretto v0.1.0 h1:Jv3CGQHp9OjuMBSne1485aDpUkTKEcUqF+jm/LuerPI= +github.com/dgraph-io/ristretto v0.1.0/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug= +github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/djherbis/atime v1.0.0 h1:ySLvBAM0EvOGaX7TI4dAM5lWj+RdJUCKtGSEHN8SGBg= +github.com/djherbis/atime v1.0.0/go.mod h1:5W+KBIuTwVGcqjIfaTwt+KSYX1o6uep8dtevevQP/f8= +github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= +github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d h1:QK8IYltsNy+5QZcDFbVkyInrs98/wHy1tfUTGG91sps= +github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d/go.mod h1:apXo4PA/BgBPrt66j0N45O2stlBTRowdip2igwcUWVc= +github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= +github.com/eapache/go-resiliency v1.2.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= +github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= +github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= +github.com/eclipse/paho.mqtt.golang v1.3.0 h1:MU79lqr3FKNKbSrGN7d7bNYqh8MwWW7Zcx0iG+VIw9I= +github.com/eclipse/paho.mqtt.golang v1.3.0/go.mod h1:eTzb4gxwwyWpqBUHGQZ4ABAV7+Jgm1PklsYT/eo8Hcc= +github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= +github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/elazarl/go-bindata-assetfs v1.0.0 h1:G/bYguwHIzWq9ZoyUQqrjTmJbbYn3j3CKKpKinvZLFk= +github.com/elazarl/go-bindata-assetfs v1.0.0/go.mod h1:v+YaWX3bdea5J/mo8dSETolEo7R71Vk1u8bnjau5yw4= +github.com/emersion/go-ical v0.0.0-20200224201310-cd514449c39e/go.mod h1:4xVTBPcT43a1pp3vdaa+FuRdX5XhKCZPpWv7m0z9ByM= +github.com/emersion/go-vcard v0.0.0-20191221110513-5f81fa0d3cc7/go.mod h1:HMJKR5wlh/ziNp+sHEDV2ltblO4JD2+IdDOWtGcQBTM= +github.com/emersion/go-webdav v0.3.0 h1:I1J9xf7fa1NxXFWCKyN5Ju3Sa3jJeNMI3uJQKHYg0SY= +github.com/emersion/go-webdav v0.3.0/go.mod h1:uSM1VveeKtogBVWaYccTksToczooJ0rrVGNsgnDsr4Q= +github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377 h1:gT+RM6gdTIAzMT7HUvmT5mL8SyG8Wx7iS3+L0V34Km4= +github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377/go.mod h1:v6o7m/E9bfvm79dE1iFiF+3T7zLBnrjYjkWMa1J+Hv0= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= +github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= +github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= +github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= +github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= +github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk= +github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= +github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= +github.com/frankban/quicktest v1.10.2 h1:19ARM85nVi4xH7xPXuc5eM/udya5ieh7b/Sv+d844Tk= +github.com/frankban/quicktest v1.10.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/gin-contrib/gzip v0.0.1/go.mod h1:fGBJBCdt6qCZuCAOwWuFhBB4OOq9EFqlo5dEaFhhu5w= +github.com/gin-contrib/sse v0.0.0-20170109093832-22d885f9ecc7/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= +github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.3.0/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y= +github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= +github.com/gin-gonic/gin v1.5.0/go.mod h1:Nd6IXA8m5kNZdNEHMBd93KT+mdY3+bewLgRvmCsR2Do= +github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8= +github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= +github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= +github.com/go-echarts/go-echarts v1.0.0/go.mod h1:qbmyAb/Rl1f2w7wKba1D4LoNq4U164yO4/wedFbcWyo= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= +github.com/go-ldap/ldap v3.0.2+incompatible h1:kD5HQcAzlQ7yrhfn+h+MSABeAy/jAJhvIJ/QDllP44g= +github.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc= +github.com/go-ldap/ldap/v3 v3.2.4 h1:PFavAq2xTgzo/loE8qNXcQaofAaqIpI4WgaLdv+1l3E= +github.com/go-ldap/ldap/v3 v3.2.4/go.mod h1:iYS1MdmrmceOJ1QOTnRXrIs7i3kloqtmGQjRvjKpyMg= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI= +github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= +github.com/go-openapi/jsonpointer v0.17.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= +github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= +github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/jsonreference v0.17.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= +github.com/go-openapi/jsonreference v0.19.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= +github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= +github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= +github.com/go-openapi/spec v0.19.0/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= +github.com/go-openapi/spec v0.19.4/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= +github.com/go-openapi/swag v0.17.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= +github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-playground/locales v0.12.1/go.mod h1:IUMDtCfWo/w/mtMfIE/IG2K+Ey3ygWanZIBtBW0W2TM= +github.com/go-playground/overalls v0.0.0-20180201144345-22ec1a223b7c/go.mod h1:UqxAgEOt89sCiXlrc/ycnx00LVvUO/eS8tMUkWX4R7w= +github.com/go-playground/universal-translator v0.16.0/go.mod h1:1AnU7NaIRDWWzGEKwgtJRd2xk99HeFyHw3yid4rvQIY= +github.com/go-redis/redis/v8 v8.4.0 h1:J5NCReIgh3QgUJu398hUncxDExN4gMOHI11NVbVicGQ= +github.com/go-redis/redis/v8 v8.4.0/go.mod h1:A1tbYoHSa1fXwN+//ljcCYYJeLmVrwL9hbQN45Jdy0M= +github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= +github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= +github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-test/deep v1.0.2-0.20181118220953-042da051cf31/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/goccy/go-graphviz v0.0.5/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= +github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= +github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= +github.com/gofrs/uuid v3.2.0+incompatible h1:y12jRkkFxsd7GpqdSZ+/KCs/fJbqpEXSGd4+jfEaewE= +github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= +github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.2-0.20190904063534-ff6b7dc882cf/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gomodule/redigo v1.8.3 h1:HR0kYDX2RJZvAup8CsiJwxB4dTCSC0AaUq6S4SiLwUc= +github.com/gomodule/redigo v1.8.3/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/flatbuffers v1.12.1 h1:MVlul7pQNoDzWRLTw5imwYsl+usrS1TXG2H4jg6ImGw= +github.com/google/flatbuffers v1.12.1/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3 h1:x95R7cp+rSeeqAMI2knLtQ0DKlaBhv2NrtrOvafPHRo= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gops v0.3.13 h1:8lgvDd3tXe4UxVbmPPTGE0ToIpbh3hgXkt4EVZ8Y/hU= +github.com/google/gops v0.3.13/go.mod h1:38bMPVKFh+1X106CPpbLAWtZIR1+xwgzT9gew0kn6w4= +github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20200407044318-7d83b28da2e9/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/readahead v0.0.0-20161222183148-eaceba169032 h1:6Be3nkuJFyRfCgr6qTIzmRp8y9QwDIbqy/nYr9WDPos= +github.com/google/readahead v0.0.0-20161222183148-eaceba169032/go.mod h1:qYysrqQXuV4tzsizt4oOQ6mrBZQ0xnQXP3ylXX8Jk5Y= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf/go.mod h1:RpwtwJQFrIEPstU94h88MWPXP2ektJZ8cZ0YntAmXiE= +github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gax-go/v2 v2.0.4 h1:hU4mGcQI4DaAYW+IbTun+2qEZVFxK0ySjQLTbS0VQKc= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= +github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4= +github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= +github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.0/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.1.0 h1:THDBEeQ9xZ8JEaCLyLQqXMMdRqNr0QAUJTIkQAUtFjg= +github.com/grpc-ecosystem/go-grpc-middleware v1.1.0/go.mod h1:f5nM7jw/oeRSadq3xCzHAvxcr8HZnzsqU6ILg/0NiiE= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.12.1 h1:zCy2xE9ablevUOrUZc3Dl72Dt+ya2FNAvC2yLYMHzi4= +github.com/grpc-ecosystem/grpc-gateway v1.12.1/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c= +github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69/go.mod h1:YLEMZOtU+AZ7dhN9T/IpGhXVGly2bvkJQ+zxj3WeVQo= +github.com/hanwen/go-fuse v1.0.0 h1:GxS9Zrn6c35/BnfiVsZVWmsG803xwE7eVRDvcf/BEVc= +github.com/hanwen/go-fuse v1.0.0/go.mod h1:unqXarDXqzAk0rt98O2tVndEPIpUgLD9+rwFisZH3Ok= +github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= +github.com/hashicorp/consul/api v1.11.0 h1:Hw/G8TtRvOElqxVIhBzXciiSTbapq8hZ2XKZsXk5ZCE= +github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= +github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/consul/sdk v0.8.0 h1:OJtKBtEjboEZvG6AOUdh4Z1Zbyu0WcxQ0qatRrZHTVU= +github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI= +github.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= +github.com/hashicorp/go-hclog v0.14.1 h1:nQcJDQwIAGnmoUWp8ubocEX40cCml/17YkF6csQLReU= +github.com/hashicorp/go-hclog v0.14.1/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs= +github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI= +github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= +github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-retryablehttp v0.5.4 h1:1BZvpawXoJCWX6pNtow9+rpEj+3itIlutiqnntI6jOE= +github.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-rootcerts v1.0.1/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= +github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= +github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= +github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-sockaddr v1.0.2 h1:ztczhD1jLxIRjVejw8gFomI1BQZOe2WoVOu0SyteCQc= +github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= +github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= +github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= +github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= +github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= +github.com/hashicorp/memberlist v0.2.2 h1:5+RffWKwqJ71YPu9mWsF7ZOscZmwfasdA8kbdC7AO2g= +github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= +github.com/hashicorp/raft v1.2.0 h1:mHzHIrF0S91d3A7RPBvuqkgB4d/7oFJZyvf1Q4m7GA0= +github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8= +github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= +github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hashicorp/serf v0.9.5 h1:EBWvyu9tcRszt3Bxp3KNssBMP1KuHWyO51lz9+786iM= +github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= +github.com/hashicorp/vault/api v1.0.4 h1:j08Or/wryXT4AcHj1oCbMd7IijXcKzYUGw59LGu9onU= +github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q= +github.com/hashicorp/vault/sdk v0.1.13 h1:mOEPeOhT7jl0J4AMl1E705+BcmeRs1VmKNb9F0sMLy8= +github.com/hashicorp/vault/sdk v0.1.13/go.mod h1:B+hVj7TpuQY1Y/GPbCpffmgd+tSEwvhkWnjtSYCaS2M= +github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= +github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/huaweicloud/huaweicloud-sdk-go-obs v3.21.1+incompatible h1:EFjtiulITiEktaZrr0OPlymTmrlpvSAa/xvv08kTQEU= +github.com/huaweicloud/huaweicloud-sdk-go-obs v3.21.1+incompatible/go.mod h1:l7VUhRbTKCzdOacdT4oWCwATKyvZqUOlOqr0Ous3k4s= +github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= +github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099 h1:heHZCso/ytvpYr+hp2cDxlZfA/jTw46aHSvT9kZnJ7o= +github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099/go.mod h1:h44tqw4M3GN0Woo9KBStxJxm8huNi+9+tOHoeqSvhaY= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= +github.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= +github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.1/go.mod h1:T1hnNppQsBtxW0tCHMHTkAt8n/sABdzZgZdoFrZaZNM= +github.com/jcmturner/gokrb5/v8 v8.4.2 h1:6ZIM6b/JJN0X8UM43ZOM6Z4SJzla+a/u7scXFJzodkA= +github.com/jcmturner/gokrb5/v8 v8.4.2/go.mod h1:sb+Xq/fTY5yktf/VxLsE3wlfPqQjp0aWNYyvBVK62bc= +github.com/jcmturner/rpc/v2 v2.0.2/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/joomcode/errorx v1.0.1/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/juicedata/go-fuse/v2 v2.1.1-0.20210926080226-cfe1ec802a7f h1:EhufxJV98Bxn/o30aEZZ4qSc+sxQH8ohfEE5K2hHKtQ= +github.com/juicedata/go-fuse/v2 v2.1.1-0.20210926080226-cfe1ec802a7f/go.mod h1:oRyA5eK+pvJyv5otpO/DgccS8y/RvYMaO00GgRLGryc= +github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d h1:kpQMvNZJKGY3PTt7OSoahYc4nM0HY67SvK0YyS0GLwA= +github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d/go.mod h1:dlxKkLh3qAIPtgr2U/RVzsZJDuXA1ffg+Njikfmhvgw= +github.com/juicedata/minio v0.0.0-20210222051636-e7cabdf948f4 h1:/Klbj2LgJnqWrVfK2RtsqNQm649uXi2diLc/2X3eis4= +github.com/juicedata/minio v0.0.0-20210222051636-e7cabdf948f4/go.mod h1:pQt7BggRNk3m4ZLGmtZ8ShrWe70RvyeXsHljoapikM0= +github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY= +github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/keybase/go-ps v0.0.0-20190827175125-91aafc93ba19/go.mod h1:hY+WOq6m2FpbvyrI93sMaypsttvaIL5nhVR92dTMUcQ= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/compress v1.13.4 h1:0zhec2I8zGnjWcKyLl6i3gPqKANCCn5e9xmviEEeX6s= +github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= +github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= +github.com/klauspost/cpuid/v2 v2.0.2/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.3 h1:DNljyrHyxlkk8139OXIAAauCwV8eQGDD6Z8YqnDXdZw= +github.com/klauspost/cpuid/v2 v2.0.3/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= +github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/klauspost/readahead v1.3.1 h1:QqXNYvm+VvqYcbrRT4LojUciM0XrznFRIDrbHiJtu/0= +github.com/klauspost/readahead v1.3.1/go.mod h1:AH9juHzNH7xqdqFHrMRSHeH2Ps+vFf+kblDqzPFiLJg= +github.com/klauspost/reedsolomon v1.9.11 h1:n2kipJFo+CPqg7fH988XJXjqEyj14RJ8BYj7UayxPNg= +github.com/klauspost/reedsolomon v1.9.11/go.mod h1:nLvuzNvy1ZDNQW30IuMc2ZWCbiqrJgdLoUS2X8HAUVg= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/ks3sdklib/aws-sdk-go v1.0.12 h1:2oD/1wtKtujkKSV7oQL39rTYHjJdggsugbL4QKPJ3cU= +github.com/ks3sdklib/aws-sdk-go v1.0.12/go.mod h1:WKPC0Foi1kjnyeC6Ei45XBBT+CIzHuhk/uwpCRmAf+o= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= +github.com/leodido/go-urn v1.1.0/go.mod h1:+cyI34gQWZcE1eQU7NVgKkkzdXDQHr1dBMtdAPozLkw= +github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lib/pq v1.8.0 h1:9xohqzkUwzR4Ga4ivdTcawVS89YSDVxXMa3xJX3cGzg= +github.com/lib/pq v1.8.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= +github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= +github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-ieproxy v0.0.0-20190702010315-6dee0af9227d/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc= +github.com/mattn/go-ieproxy v0.0.1 h1:qiyop7gCflfhwCzGyeT0gro3sF9AIg9HU98JORTkqfI= +github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= +github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= +github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= +github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus= +github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= +github.com/mattn/go-sqlite3 v2.0.1+incompatible h1:xQ15muvnzGBHpIpdrNi1DA5x0+TcBZzsIDwmw9uTHzw= +github.com/mattn/go-sqlite3 v2.0.1+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= +github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg= +github.com/mgechev/revive v1.0.2/go.mod h1:rb0dQy1LVAxW9SWy5R3LPUjevzUbUS316U5MFySA2lo= +github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= +github.com/miekg/dns v1.1.35 h1:oTfOaDH+mZkdcgdIjH6yBajRGtIwcwcaR+rt23ZSrJs= +github.com/miekg/dns v1.1.35/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM= +github.com/minio/cli v1.22.0 h1:VTQm7lmXm3quxO917X3p+el1l0Ca5X3S4PM2ruUYO68= +github.com/minio/cli v1.22.0/go.mod h1:bYxnK0uS629N3Bq+AOZZ+6lwF77Sodk4+UL9vNuXhOY= +github.com/minio/highwayhash v1.0.1 h1:dZ6IIu8Z14VlC0VpfKofAhCy74wu/Qb5gcn52yWoz/0= +github.com/minio/highwayhash v1.0.1/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= +github.com/minio/md5-simd v1.1.1 h1:9ojcLbuZ4gXbB2sX53MKn8JUZ0sB/2wfwsEcRw+I08U= +github.com/minio/md5-simd v1.1.1/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= +github.com/minio/minio-go v6.0.14+incompatible h1:fnV+GD28LeqdN6vT2XdGKW8Qe/IfjJDswNVuni6km9o= +github.com/minio/minio-go v6.0.14+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= +github.com/minio/minio-go/v7 v7.0.10 h1:1oUKe4EOPUEhw2qnPQaPsJ0lmVTYLFu03SiItauXs94= +github.com/minio/minio-go/v7 v7.0.10/go.mod h1:td4gW1ldOsj1PbSNS+WYK43j+P1XVhX/8W8awaYlBFo= +github.com/minio/selfupdate v0.3.1 h1:BWEFSNnrZVMUWXbXIgLDNDjbejkmpAmZvy/nCz1HlEs= +github.com/minio/selfupdate v0.3.1/go.mod h1:b8ThJzzH7u2MkF6PcIra7KaXO9Khf6alWPvMSyTDCFM= +github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU= +github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/minio/simdjson-go v0.2.1 h1:nxYlp4Qd0w2pwLlif00l5vTFL6PcNAKpyHq27/pageg= +github.com/minio/simdjson-go v0.2.1/go.mod h1:JPUSkRykfSPS+AhO0YPA1h0l5vY7NqrF4zel2b12wxc= +github.com/minio/sio v0.2.1/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= +github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= +github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= +github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v0.0.0-20171004221916-a61a99592b77/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/go-testing-interface v1.0.0 h1:fzU/JVNcaqHQEcVFAKeR41fkiLdIPrefOvVG1VZ96U0= +github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= +github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= +github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= +github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mmcloughlin/avo v0.0.0-20201105074841-5d2f697d268f/go.mod h1:6aKT4zZIrpGqB3RpFU14ByCSSyKY6LfJz4J/JJChHfI= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= +github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= +github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= +github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ= +github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= +github.com/nats-io/jwt v1.1.0 h1:+vOlgtM0ZsF46GbmUoadq0/2rChNS45gtxHEa3H1gqM= +github.com/nats-io/jwt v1.1.0/go.mod h1:n3cvmLfBfnpV4JJRN7lRYCyZnw48ksGsbThGXEk4w9M= +github.com/nats-io/jwt/v2 v2.1.0 h1:1UbfD5g1xTdWmSeRV8bh/7u+utTiBsRtWhLl1PixZp4= +github.com/nats-io/jwt/v2 v2.1.0/go.mod h1:0tqz9Hlu6bCBFLWAASKhE5vUA4c24L9KPUUgvwumE/k= +github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= +github.com/nats-io/nats-server/v2 v2.1.9/go.mod h1:9qVyoewoYXzG1ME9ox0HwkkzyYvnlBDugfR4Gg/8uHU= +github.com/nats-io/nats-server/v2 v2.6.2 h1:uMydiSENbgRPsXHBYDvVVVx1d0inut/zd+DvISIGCi8= +github.com/nats-io/nats-server/v2 v2.6.2/go.mod h1:CNi6dJQ5H+vWqaoWKjCGtqBt7ai/xOTLiocUqhK6ews= +github.com/nats-io/nats-streaming-server v0.19.0 h1:NVYusu6kcMxRBj1wOWRdXBUHf1bzkJQbsHovsg+Fr1o= +github.com/nats-io/nats-streaming-server v0.19.0/go.mod h1:oqrRqpMg84aiPDyroTornjVWNYJKh+6ozh2Mgt8dslE= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= +github.com/nats-io/nats.go v1.10.0/go.mod h1:AjGArbfyR50+afOUotNX2Xs5SYHf+CoOa5HH1eEl2HE= +github.com/nats-io/nats.go v1.13.0 h1:LvYqRB5epIzZWQp6lmeltOOZNLqCvm4b+qfvzZO03HE= +github.com/nats-io/nats.go v1.13.0/go.mod h1:BPko4oXsySz4aSWeFgOHLZs3G4Jq4ZAyE6/zMCxRT6w= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nkeys v0.1.4/go.mod h1:XdZpAbhgyyODYqjTawOnIOI7VlbKSarI9Gfy1tqEu/s= +github.com/nats-io/nkeys v0.2.0/go.mod h1:XdZpAbhgyyODYqjTawOnIOI7VlbKSarI9Gfy1tqEu/s= +github.com/nats-io/nkeys v0.3.0 h1:cgM5tL53EvYRU+2YLXIK0G2mJtK12Ft9oeooSZMA2G8= +github.com/nats-io/nkeys v0.3.0/go.mod h1:gvUNGjVcM2IPr5rCsRsC6Wb3Hr2CQAm08dsxtV6A5y4= +github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/nats-io/stan.go v0.7.0 h1:sMVHD9RkxPOl6PJfDVBQd+gbxWkApeYl6GrH+10msO4= +github.com/nats-io/stan.go v0.7.0/go.mod h1:Ci6mUIpGQTjl++MqK2XzkWI/0vF+Bl72uScx7ejSYmU= +github.com/ncw/directio v1.0.5 h1:JSUBhdjEvVaJvOoyPAbcW0fnd0tvRXD76wEfZ1KcQz4= +github.com/ncw/directio v1.0.5/go.mod h1:rX/pKEYkOXBGOggmcyJeJGloCkleSvphPx2eV3t6ROk= +github.com/ncw/swift v1.0.53 h1:luHjjTNtekIEvHg5KdAFIBaH7bWfNkefwFnpDffSIks= +github.com/ncw/swift v1.0.53/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= +github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= +github.com/nicksnyder/go-i18n v1.10.0/go.mod h1:HrK7VCrbOvQoUAQ7Vpy7i87N7JZZZ7R2xBGjv0j365Q= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nsqio/go-nsq v1.0.8 h1:3L2F8tNLlwXXlp2slDUrUWSBn2O3nMh8R1/KEDFTHPk= +github.com/nsqio/go-nsq v1.0.8/go.mod h1:vKq36oyeVXgsS5Q8YEO7WghqidAVXQlcFxzQbQTuDEY= +github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= +github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/oleiade/reflections v1.0.1/go.mod h1:rdFxbxq4QXVZWj0F+e9jqjDkc7dbp97vkRixKo2JR60= +github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= +github.com/olivere/elastic/v7 v7.0.22/go.mod h1:VDexNy9NjmtAkrjNoI7tImv7FR4tf5zUA3ickqu5Pc8= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.14.2 h1:8mVmC9kjFFmA8H4pKMUhcblgifdkOIXPvbhN1T36q1M= +github.com/onsi/ginkgo v1.14.2/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= +github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.10.3 h1:gph6h/qe9GSUw1NhH1gp+qb+h8rXD8Cy60Z32Qw3ELA= +github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= +github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= +github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= +github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= +github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= +github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= +github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= +github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= +github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= +github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= +github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.3.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= +github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uCmhjJSsY78Mcuh7MVkNjTzmHx1yBzizSU= +github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg= +github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= +github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o= +github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= +github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d/go.mod h1:lXfE4PvvTW5xOjO6Mba8zDPyw8M93B6AQ7frTGnMlA8= +github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ= +github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= +github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= +github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= +github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= +github.com/pingcap/check v0.0.0-20191107115940-caf2b9e6ccf4/go.mod h1:PYMCGwN0JHjoqGr3HrZoD+b8Tgx8bKnArhSq8YVzUMc= +github.com/pingcap/check v0.0.0-20191216031241-8a5a85928f12/go.mod h1:PYMCGwN0JHjoqGr3HrZoD+b8Tgx8bKnArhSq8YVzUMc= +github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712 h1:R8gStypOBmpnHEx1qi//SaqxJVI4inOqljg/Aj5/390= +github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712/go.mod h1:PYMCGwN0JHjoqGr3HrZoD+b8Tgx8bKnArhSq8YVzUMc= +github.com/pingcap/errcode v0.3.0/go.mod h1:4b2X8xSqxIroj/IZ9MX/VGZhAwc11wB9wRIzHvz6SeM= +github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20200917111840-a15ef68f753d/go.mod h1:g4vx//d6VakjJ0mk7iLBlKA8LFavV/sAVINT/1PFxeQ= +github.com/pingcap/errors v0.11.5-0.20201126102027-b0a155152ca3 h1:LllgC9eGfqzkfubMgjKIDyZYaa609nNWAyNZtpy2B3M= +github.com/pingcap/errors v0.11.5-0.20201126102027-b0a155152ca3/go.mod h1:G7x87le1poQzLB/TqvTJI2ILrSgobnq4Ut7luOwvfvI= +github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI= +github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= +github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd h1:I8IeI8MNiZVKnwuXhcIIzz6pREcOSbq18Q31KYIzFVM= +github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd/go.mod h1:IVF+ijPSMZVtx2oIqxAg7ur6EyixtTYfOHwpfmlhqI4= +github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E= +github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw= +github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= +github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20210819164333-bd5706b9d9f2/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20211122024046-03abd340988f h1:hjInxK1Ie6CYx7Jy2pYnBdEnWI8jIfr423l9Yh6LRy8= +github.com/pingcap/kvproto v0.0.0-20211122024046-03abd340988f/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= +github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= +github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= +github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7 h1:k2BbABz9+TNpYRwsCCFS8pEEnFVOdbgEjL/kTlLuzZQ= +github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= +github.com/pingcap/sysutil v0.0.0-20210315073920-cc0985d983a3/go.mod h1:tckvA041UWP+NqYzrJ3fMgC/Hw9wnmQ/tUkp/JaHly8= +github.com/pingcap/sysutil v0.0.0-20210730114356-fcd8a63f68c5/go.mod h1:XsOaV712rUk63aOEKYP9PhXTIE3FMNHmC2r1wX5wElY= +github.com/pingcap/tidb-dashboard v0.0.0-20211008050453-a25c25809529/go.mod h1:OCXbZTBTIMRcIt0jFsuCakZP+goYRv6IjawKbwLS2TQ= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= +github.com/pkg/sftp v1.10.0 h1:DGA1KlA9esU6WcicH+P8PxFZOl15O6GYtab1cIJdOlE= +github.com/pkg/sftp v1.10.0/go.mod h1:NxmoDg/QLVWluQDUYG7XBZTLUpKeFa8e3aMf1BfjyHk= +github.com/pkg/xattr v0.4.4 h1:FSoblPdYobYoKCItkqASqcrKCxRn9Bgurz0sCBwzO5g= +github.com/pkg/xattr v0.4.4/go.mod h1:sBD3RAqlr8Q+RC3FutZcikpT8nyDrIEEBw2J744gVWs= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= +github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7 h1:xoIK0ctDddBMnc74udxJYBqlo9Ylnsp1waqjLsnef20= +github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= +github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= +github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= +github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.8.0/go.mod h1:O9VU6huf47PktckDQfMTX0Y8tY0/7TSWwj+ITvv0TnM= +github.com/prometheus/client_golang v1.9.0 h1:Rrch9mh17XcxvEu9D9DEpb4isxjGBtcevQjKvxPRQIU= +github.com/prometheus/client_golang v1.9.0/go.mod h1:FqZLKOZnGdFAhOK4nqGHa7D66IdsO+O441Eve7ptJDU= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= +github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.14.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= +github.com/prometheus/common v0.15.0 h1:4fgOnadei3EZvgRwxJ7RMpG1k1pOZth5Pc13tyspaKM= +github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= +github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.2.0 h1:wH4vA7pcjKuZzjF7lM8awk4fnuJO6idemZXoKnULUx4= +github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/qingstor/qingstor-sdk-go/v4 v4.4.0 h1:tbItWtGB1TDfYzqK8dtm6tV+xWU5iYMwL37C6AL5dDs= +github.com/qingstor/qingstor-sdk-go/v4 v4.4.0/go.mod h1:mDVFtA7+bXQ5xoELTWkoFy1Ad13wtp8jtlnl/RU+zzM= +github.com/qiniu/api.v7/v7 v7.8.0 h1:Ye9sHXwCpeDgKJ4BNSoDvXe4yEuU8a/HTT1jKRgkqe8= +github.com/qiniu/api.v7/v7 v7.8.0/go.mod h1:J7pD9UsnxO7XxyRLUHpsWEQd/HgWJNwnn/Za9qEPdEA= +github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rjeczalik/notify v0.9.2 h1:MiTWrPj55mNDHEiIX5YUSKefw/+lCQVoAFmD6oQm5w8= +github.com/rjeczalik/notify v0.9.2/go.mod h1:aErll2f0sUX9PXZnVNyeiObbmTlk5jnMoCa4QEjJeqM= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= +github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= +github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc= +github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= +github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk= +github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= +github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= +github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= +github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= +github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= +github.com/satori/uuid v1.2.0 h1:6TFY4nxn5XwBx0gDfzbEMCNT6k4N/4FNIuN8RACZ0KI= +github.com/satori/uuid v1.2.0/go.mod h1:B8HLsPLik/YNn6KKWVMDJ8nzCL8RP5WyfsnmvnAEwIU= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/secure-io/sio-go v0.3.1 h1:dNvY9awjabXTYGsTF1PiCySl9Ltofk9GA3VdWlo7rRc= +github.com/secure-io/sio-go v0.3.1/go.mod h1:+xbkjDzPjwh4Axd07pRKSNriS9SCiYksWnZqdnfpQxs= +github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shirou/gopsutil v2.20.4+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil v3.20.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil v3.21.2+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil v3.21.3+incompatible h1:uenXGGa8ESCQq+dbgtl916dmg6PSAz2cXov0uORQ9v8= +github.com/shirou/gopsutil v3.21.3+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc= +github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= +github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0/go.mod h1:919LwcH0M7/W4fcZ0/jy0qGght1GIhqyS/EgWGH2j5Q= +github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/assertions v1.1.1 h1:T/YLemO5Yp7KPzS+lVtu+WsHn8yoSwTfItdAd1r3cck= +github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= +github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/smartystreets/gunit v1.4.2/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak= +github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= +github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= +github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= +github.com/streadway/amqp v1.0.0 h1:kuuDrUJFZL1QYL9hUNuCxNObNzB0bV/ZG5jV3RWAQgo= +github.com/streadway/amqp v1.0.0/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= +github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= +github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= +github.com/swaggo/http-swagger v0.0.0-20200308142732-58ac5e232fba/go.mod h1:O1lAbCgAAX/KZ80LM/OXwtWFI/5TvZlwxSg8Cq08PV0= +github.com/swaggo/swag v1.5.1/go.mod h1:1Bl9F/ZBpVWh22nY0zmYyASPO1lI/zIwRDrpZU+tv8Y= +github.com/swaggo/swag v1.6.3/go.mod h1:wcc83tB4Mb2aNiL/HP4MFeQdpHUrca+Rp/DRNgWAUio= +github.com/swaggo/swag v1.6.6-0.20200529100950-7c765ddd0476/go.mod h1:xDhTyuFIujYiN3DKWC/H/83xcfHp+UE/IzWWampG7Zc= +github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= +github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 h1:1oFLiOyVl+W7bnBzGhf7BbIv9loSFQcieWWYIjLqcAw= +github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= +github.com/tencentyun/cos-go-sdk-v5 v0.7.8 h1:BeqN3uNCyYgoujWqZDbpQMhNmPf5xIypjzbT2AMMZUs= +github.com/tencentyun/cos-go-sdk-v5 v0.7.8/go.mod h1:wQBO5HdAkLjj2q6XQiIfDSP8DXDNrppDRw2Kp/1BODA= +github.com/thoas/go-funk v0.8.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= +github.com/tidwall/gjson v1.3.5/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= +github.com/tidwall/gjson v1.6.7/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI= +github.com/tidwall/gjson v1.9.3 h1:hqzS9wAHMO+KVBBkLxYdkEeeFHuqr95GfClRLKlgK0E= +github.com/tidwall/gjson v1.9.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.0.1/go.mod h1:LujAq0jyVjBy028G1WhWfIzbpQfMO8bBZ6Tyb0+pL9E= +github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg= +github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y= +github.com/tikv/client-go/v2 v2.0.0-rc h1:UDo44co7FNa74HPS5o191Yvh3CeAaHrBUpFUa2k7d7I= +github.com/tikv/client-go/v2 v2.0.0-rc/go.mod h1:wRuh+W35daKTiYBld0oBlT6PSkzEVr+pB/vChzJZk+8= +github.com/tikv/pd v1.1.0-beta.0.20211029083450-e65f0c55b6ae h1:PmnkhiOopgMZYDQ7Htj1kt/zwW4MEOUL+Dem6WLZISY= +github.com/tikv/pd v1.1.0-beta.0.20211029083450-e65f0c55b6ae/go.mod h1:varH0IE0jJ9E9WN2Ei/N6pajMlPkcXdDEf7f5mmsUVQ= +github.com/tinylib/msgp v1.1.3 h1:3giwAkmtaEDLSV0MdO1lDLuPgklgPzmk8H9+So2BVfA= +github.com/tinylib/msgp v1.1.3/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= +github.com/tklauser/go-sysconf v0.3.4/go.mod h1:Cl2c8ZRWfHD5IrfHo9VN+FX9kCFjIOyVklgXycLB6ek= +github.com/tklauser/go-sysconf v0.3.6 h1:oc1sJWvKkmvIxhDHeKWvZS4f6AW+YcoguSfRF2/Hmo4= +github.com/tklauser/go-sysconf v0.3.6/go.mod h1:MkWzOF4RMCshBAMXuhXJs64Rte09mITnppBXY/rYEFI= +github.com/tklauser/numcpus v0.2.1/go.mod h1:9aU+wOc6WjUIZEwWMP62PL/41d65P+iks1gBkr4QyP8= +github.com/tklauser/numcpus v0.2.2 h1:oyhllyrScuYI6g+h/zUvNXNp1wy7x8qQy3t/piefldA= +github.com/tklauser/numcpus v0.2.2/go.mod h1:x3qojaO3uyYt0i56EW/VUYs7uBvdl2fkfZFu0T9wgjM= +github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 h1:LnC5Kc/wtumK+WB441p7ynQJzVuNRJiqddSIE3IlSEQ= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= +github.com/twmb/murmur3 v1.1.3 h1:D83U0XYKcHRYwYIpBKf3Pks91Z0Byda/9SJ8B6EMRcA= +github.com/twmb/murmur3 v1.1.3/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.5-pre/go.mod h1:FwP/aQVg39TXzItUBMwnWp9T9gPQnXw4Poh4/oBQZ/0= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.5-pre/go.mod h1:tULtS6Gy1AE1yCENaw4Vb//HLH5njI2tfCQDUqRd8fI= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= +github.com/upyun/go-sdk/v3 v3.0.2 h1:Ke+iOipK5CT0xzMwsgJsi7faJV7ID4lAs+wrH1RH0dA= +github.com/upyun/go-sdk/v3 v3.0.2/go.mod h1:P/SnuuwhrIgAVRd/ZpzDWqCsBAf/oHg7UggbAxyZa0E= +github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= +github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY= +github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/urfave/cli/v2 v2.1.1/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= +github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= +github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= +github.com/urfave/negroni v0.3.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a h1:0R4NLDRDZX6JcmhJgXi5E4b8Wg84ihbmUKp/GvSPEzc= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/vbauerster/mpb/v7 v7.0.3 h1:NfX0pHWhlDTev15M/C3qmSTM1EiIjcS+/d6qS6H4FnI= +github.com/vbauerster/mpb/v7 v7.0.3/go.mod h1:NXGsfPGx6G2JssqvEcULtDqUrxuuYs4llpv8W6ZUpzk= +github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8 h1:EVObHAr8DqpoJCVv6KYTle8FEImKhtkfcZetNqxDoJQ= +github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE= +github.com/vmihailenco/msgpack/v4 v4.3.11/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4= +github.com/vmihailenco/msgpack/v5 v5.0.0-beta.1/go.mod h1:xlngVLeyQ/Qi05oQxhQ+oTuqa03RjMwMfk/7/TCs+QI= +github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= +github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= +github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= +github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c h1:u40Z8hqBAAQyv+vATcGgV0YCnDjqSL7/q/JyPhhJSPk= +github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= +github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0= +github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xlab/treeprint v1.0.0/go.mod h1:IoImgRak9i3zJyuxOKUP1v4UZd1tMoKkq/Cimt1uhCg= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= +go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= +go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= +go.etcd.io/etcd v0.0.0-20201125193152-8a03d2e9614b/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= +go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= +go.etcd.io/etcd v0.5.0-alpha.5.0.20200824191128-ae9734ed278b h1:3kC4J3eQF6p1UEfQTkC67eEeb3rTk+shQqdX6tFyq9Q= +go.etcd.io/etcd v0.5.0-alpha.5.0.20200824191128-ae9734ed278b/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= +go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= +go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.5 h1:dntmOdLpSpHlVqbW5Eay97DelsZHe+55D+xC6i0dDS0= +go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= +go.opentelemetry.io/otel v0.14.0 h1:YFBEfjCk9MTjaytCNSUkp9Q8lF7QJezA06T71FbQxLQ= +go.opentelemetry.io/otel v0.14.0/go.mod h1:vH5xEuwy7Rts0GNtsCW3HYQoZDY+OmBJ6t1bFGGlxgw= +go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= +go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/dig v1.8.0/go.mod h1:X34SnWGr8Fyla9zQNO2GSO2D+TIuqB14OS8JhYocIyw= +go.uber.org/fx v1.10.0/go.mod h1:vLRicqpG/qQEzno4SYU86iCwfT95EZza+Eba0ItuxqY= +go.uber.org/goleak v0.10.0/go.mod h1:VCZuO8V8mFPlL0F5J5GK1rtHV3DrFcQ1R8ryq7FK0aI= +go.uber.org/goleak v1.1.10 h1:z+mqJhf6ss6BSfSM671tgKyZBFPTTJM+HLxnhPC3wu0= +go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= +go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= +go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec= +go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= +go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= +go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.12.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= +go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= +go.uber.org/zap v1.15.0/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= +go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= +go.uber.org/zap v1.19.0 h1:mZQZefskPPCMIBCSEH0v2/iUqqLrYtaeqwD6FUGUnFE= +go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= +golang.org/x/arch v0.0.0-20201008161808-52c3e6f60cff/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200117160349-530e935923ad/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201124201722-c8d3bf9c5392/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= +golang.org/x/crypto v0.0.0-20210314154223-e6e6c4f2bb5b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e h1:gsTQYXdTw2Gq7RBsWvlQ91b+aEQ6bXFUngBGuR8sPpI= +golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200425230154-ff2c4b7c35a0/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201216054612-986b41b23924/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4 h1:4nGaVu0QrbjT/AK2PRLuQfQuh6DJve+pELhqTdAj3x0= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190517181255-950ef44c6e07 h1:XC1K3wNjuz44KaI+cj85C9TW85w/46RH7J+DTXNH5Wk= +golang.org/x/oauth2 v0.0.0-20190517181255-950ef44c6e07/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190129075346-302c3dd5f1cc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190610200419-93c9922d18ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200501145240-bc7a7d42d5c3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201214210602-f9fddec55a1e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210217105451-b926d437f341/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210316164454-77fc1eacc6aa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c= +golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1 h1:NusfzzA6yGQ+ua51ck7E3omNUX/JuqbFSaRGqU8CcLI= +golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190424220101-1e8e1cfdf96b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524210228-3d17549cdc6b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606050223-4d9ae51c2468/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190611222205-d73e1c7e250b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191030062658-86caa796c7ab/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191107010934-f79515f33823/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191114200427-caa0b0f7d508/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210112230658-8b4aab62c064/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.4 h1:cVngSRcfgyZCzys3KYOpCFa+4dqX/Oub9tAq00ttGVs= +golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= +gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= +google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= +google.golang.org/api v0.5.0 h1:lj9SyhMzyoa38fgFF0oO2T6pjs5IzkLPKfVtxpyCRMM= +google.golang.org/api v0.5.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190404172233-64821d5d2107/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190508193815-b515fa19cec8/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 h1:YzfoEYWbODU5Fbt37+h7X16BWQbad7Q4S6gclTKFXM8= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +gopkg.in/alecthomas/gometalinter.v2 v2.0.12/go.mod h1:NDRytsqEZyolNuAgTzJkZMkSQM7FIKyzVzGhjB/qfYo= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mod h1:3HH7i1SgMqlzxCcBmUHW657sD4Kvv9sC3HpL3YukzwA= +gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d/go.mod h1:cuepJuh7vyXfUyUwEgHQXw849cJrilpS5NeIjOWESAw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= +gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= +gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/go-playground/validator.v9 v9.29.1/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= +gopkg.in/ini.v1 v1.57.0 h1:9unxIsFcTt4I55uWluz+UmL95q4kdJ0buvQ1ZIqVQww= +gopkg.in/ini.v1 v1.57.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= +gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= +gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= +gopkg.in/jcmturner/gokrb5.v7 v7.5.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= +gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= +gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216 h1:2TSTkQ8PMvGOD5eeqqRVv6Z9+BYI+bowK97RCr3W+9M= +gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216/go.mod h1:zJ2QpyDCYo1KvLXlmdnFlQAyF/Qfth0fB8239Qg7BIE= +gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= +gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/square/go-jose.v2 v2.3.1 h1:SK5KegNXmKmqE342YYN2qPHEnUYeoMiXXl1poUlI+o4= +gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/urfave/cli.v1 v1.20.0/go.mod h1:vuBzUtMdQeixQj8LVd+/98pzhxNGQoyuPBlsXHOQNO0= +gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/mysql v1.0.6/go.mod h1:KdrTanmfLPPyAOeYGyG+UpDys7/7eeWT1zCq+oekYnU= +gorm.io/driver/sqlite v1.1.4/go.mod h1:mJCeTFr7+crvS+TRnWc5Z3UvwxUN1BGBLMrf5LA9DYw= +gorm.io/gorm v1.20.7/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw= +gorm.io/gorm v1.21.9/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0= +honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.2.0/go.mod h1:lPVVZ2BS5TfnjLyizF7o7hv7j9/L+8cZY2hLyjP9cGY= +k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= +moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= +rsc.io/goversion v1.2.0/go.mod h1:Eih9y/uIBS3ulggl7KNJ09xGSLcuNaLgmvvqa07sgfo= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= +sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= +sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= +sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= +xorm.io/builder v0.3.7 h1:2pETdKRK+2QG4mLX4oODHEhn5Z8j1m8sXa7jfu+/SZI= +xorm.io/builder v0.3.7/go.mod h1:aUW0S9eb9VCaPohFCH3j7czOx1PMW3i1HrSzbLYGBSE= +xorm.io/xorm v1.0.7 h1:26yBTDVI+CfQpVz2Y88fISh+aiJXIPP4eNoTJlwzsC4= +xorm.io/xorm v1.0.7/go.mod h1:uF9EtbhODq5kNWxMbnBEj8hRRZnlcNSz2t2N7HW/+A4= diff --git a/hack/autocomplete/bash_autocomplete b/hack/autocomplete/bash_autocomplete new file mode 100644 index 0000000..e2ad43e --- /dev/null +++ b/hack/autocomplete/bash_autocomplete @@ -0,0 +1,18 @@ +#! /bin/bash + +_cli_bash_autocomplete() { + if [[ "${COMP_WORDS[0]}" != "source" ]]; then + local cur opts base + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ "$cur" == "-"* ]]; then + opts=$( ${COMP_WORDS[@]:0:$COMP_CWORD} ${cur} --generate-bash-completion ) + else + opts=$( ${COMP_WORDS[@]:0:$COMP_CWORD} --generate-bash-completion ) + fi + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -o bashdefault -o default -o nospace -F _cli_bash_autocomplete juicefs diff --git a/hack/autocomplete/zsh_autocomplete b/hack/autocomplete/zsh_autocomplete new file mode 100644 index 0000000..7cdb3aa --- /dev/null +++ b/hack/autocomplete/zsh_autocomplete @@ -0,0 +1,22 @@ +#compdef juicefs + +_cli_zsh_autocomplete() { + local -a opts + local cur + cur=${words[-1]} + if [[ "$cur" == "-"* ]]; then + opts=("${(@f)$(_CLI_ZSH_AUTOCOMPLETE_HACK=1 ${words[@]:0:#words[@]-1} ${cur} --generate-bash-completion)}") + else + opts=("${(@f)$(_CLI_ZSH_AUTOCOMPLETE_HACK=1 ${words[@]:0:#words[@]-1} --generate-bash-completion)}") + fi + + if [[ "${opts[1]}" != "" ]]; then + _describe 'values' opts + else + _files + fi + + return +} + +compdef _cli_zsh_autocomplete juicefs diff --git a/hack/builder/Dockerfile b/hack/builder/Dockerfile new file mode 100644 index 0000000..57ed5d7 --- /dev/null +++ b/hack/builder/Dockerfile @@ -0,0 +1,8 @@ +FROM goreng/golang-cross:v1.15.7 + +ARG APT_MIRROR +RUN sed -i -e "s/deb.debian.org/${APT_MIRROR:-deb.debian.org}/g" \ + -e "s/security.debian.org/${APT_MIRROR:-security.debian.org}/g" \ + /etc/apt/sources.list && \ + apt-get update && apt-get install -y musl-tools && apt -y autoremove && \ + apt-get clean && rm -rf /var/cache/apt/* /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/hack/builder/sdk.Dockerfile b/hack/builder/sdk.Dockerfile new file mode 100644 index 0000000..201af4c --- /dev/null +++ b/hack/builder/sdk.Dockerfile @@ -0,0 +1,5 @@ +FROM centos:7 + +RUN yum install -y java-1.8.0-openjdk maven git gcc make \ + && ln -s /go/bin/go /usr/local/bin/go \ + && rm -rf /var/cache/yum diff --git a/hack/winfsp_headers/fuse.h b/hack/winfsp_headers/fuse.h new file mode 100644 index 0000000..ed5238f --- /dev/null +++ b/hack/winfsp_headers/fuse.h @@ -0,0 +1,261 @@ +/** + * @file fuse/fuse.h + * WinFsp FUSE compatible API. + * + * This file is derived from libfuse/include/fuse.h: + * FUSE: Filesystem in Userspace + * Copyright 2001-2007 Miklos Szeredi + * + * @copyright 2015-2020 Bill Zissimopoulos + */ +/* + * This file is part of WinFsp. + * + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 3 as published by the Free Software + * Foundation. + * + * Licensees holding a valid commercial license may use this software + * in accordance with the commercial license agreement provided in + * conjunction with the software. The terms and conditions of any such + * commercial license agreement shall govern, supersede, and render + * ineffective any application of the GPLv3 license to this software, + * notwithstanding of any reference thereto in the software or + * associated repository. + */ + +#ifndef FUSE_H_ +#define FUSE_H_ + +#include "fuse_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct fuse; + +typedef int (*fuse_fill_dir_t)(void *buf, const char *name, + const struct fuse_stat *stbuf, fuse_off_t off); +typedef struct fuse_dirhandle *fuse_dirh_t; +typedef int (*fuse_dirfil_t)(fuse_dirh_t h, const char *name, + int type, fuse_ino_t ino); + +struct fuse_operations +{ + /* S - supported by WinFsp */ + /* S */ int (*getattr)(const char *path, struct fuse_stat *stbuf); + /* S */ int (*getdir)(const char *path, fuse_dirh_t h, fuse_dirfil_t filler); + /* S */ int (*readlink)(const char *path, char *buf, size_t size); + /* S */ int (*mknod)(const char *path, fuse_mode_t mode, fuse_dev_t dev); + /* S */ int (*mkdir)(const char *path, fuse_mode_t mode); + /* S */ int (*unlink)(const char *path); + /* S */ int (*rmdir)(const char *path); + /* S */ int (*symlink)(const char *dstpath, const char *srcpath); + /* S */ int (*rename)(const char *oldpath, const char *newpath); + /* _ */ int (*link)(const char *srcpath, const char *dstpath); + /* S */ int (*chmod)(const char *path, fuse_mode_t mode); + /* S */ int (*chown)(const char *path, fuse_uid_t uid, fuse_gid_t gid); + /* S */ int (*truncate)(const char *path, fuse_off_t size); + /* S */ int (*utime)(const char *path, struct fuse_utimbuf *timbuf); + /* S */ int (*open)(const char *path, struct fuse_file_info *fi); + /* S */ int (*read)(const char *path, char *buf, size_t size, fuse_off_t off, + struct fuse_file_info *fi); + /* S */ int (*write)(const char *path, const char *buf, size_t size, fuse_off_t off, + struct fuse_file_info *fi); + /* S */ int (*statfs)(const char *path, struct fuse_statvfs *stbuf); + /* S */ int (*flush)(const char *path, struct fuse_file_info *fi); + /* S */ int (*release)(const char *path, struct fuse_file_info *fi); + /* S */ int (*fsync)(const char *path, int datasync, struct fuse_file_info *fi); + /* S */ int (*setxattr)(const char *path, const char *name, const char *value, size_t size, + int flags); + /* S */ int (*getxattr)(const char *path, const char *name, char *value, size_t size); + /* S */ int (*listxattr)(const char *path, char *namebuf, size_t size); + /* S */ int (*removexattr)(const char *path, const char *name); + /* S */ int (*opendir)(const char *path, struct fuse_file_info *fi); + /* S */ int (*readdir)(const char *path, void *buf, fuse_fill_dir_t filler, fuse_off_t off, + struct fuse_file_info *fi); + /* S */ int (*releasedir)(const char *path, struct fuse_file_info *fi); + /* S */ int (*fsyncdir)(const char *path, int datasync, struct fuse_file_info *fi); + /* S */ void *(*init)(struct fuse_conn_info *conn); + /* S */ void (*destroy)(void *data); + /* _ */ int (*access)(const char *path, int mask); + /* S */ int (*create)(const char *path, fuse_mode_t mode, struct fuse_file_info *fi); + /* S */ int (*ftruncate)(const char *path, fuse_off_t off, struct fuse_file_info *fi); + /* S */ int (*fgetattr)(const char *path, struct fuse_stat *stbuf, struct fuse_file_info *fi); + /* _ */ int (*lock)(const char *path, + struct fuse_file_info *fi, int cmd, struct fuse_flock *lock); + /* S */ int (*utimens)(const char *path, const struct fuse_timespec tv[2]); + /* _ */ int (*bmap)(const char *path, size_t blocksize, uint64_t *idx); + /* _ */ unsigned int flag_nullpath_ok:1; + /* _ */ unsigned int flag_nopath:1; + /* _ */ unsigned int flag_utime_omit_ok:1; + /* _ */ unsigned int flag_reserved:29; + /* S */ int (*ioctl)(const char *path, int cmd, void *arg, struct fuse_file_info *fi, + unsigned int flags, void *data); + /* _ */ int (*poll)(const char *path, struct fuse_file_info *fi, + struct fuse_pollhandle *ph, unsigned *reventsp); + /* FUSE 2.9 */ + /* _ */ int (*write_buf)(const char *path, + struct fuse_bufvec *buf, fuse_off_t off, struct fuse_file_info *fi); + /* _ */ int (*read_buf)(const char *path, + struct fuse_bufvec **bufp, size_t size, fuse_off_t off, struct fuse_file_info *fi); + /* _ */ int (*flock)(const char *path, struct fuse_file_info *, int op); + /* _ */ int (*fallocate)(const char *path, int mode, fuse_off_t off, fuse_off_t len, + struct fuse_file_info *fi); + /* OSXFUSE */ + /* _ */ int (*reserved00)(); + /* _ */ int (*reserved01)(); + /* _ */ int (*reserved02)(); + /* _ */ int (*statfs_x)(const char *path, struct fuse_statfs *stbuf); + /* _ */ int (*setvolname)(const char *volname); + /* _ */ int (*exchange)(const char *oldpath, const char *newpath, unsigned long flags); + /* _ */ int (*getxtimes)(const char *path, + struct fuse_timespec *bkuptime, struct fuse_timespec *crtime); + /* _ */ int (*setbkuptime)(const char *path, const struct fuse_timespec *tv); + /* S */ int (*setchgtime)(const char *path, const struct fuse_timespec *tv); + /* S */ int (*setcrtime)(const char *path, const struct fuse_timespec *tv); + /* S */ int (*chflags)(const char *path, uint32_t flags); + /* _ */ int (*setattr_x)(const char *path, struct fuse_setattr_x *attr); + /* _ */ int (*fsetattr_x)(const char *path, struct fuse_setattr_x *attr, + struct fuse_file_info *fi); +}; + +struct fuse_context +{ + struct fuse *fuse; + fuse_uid_t uid; + fuse_gid_t gid; + fuse_pid_t pid; + void *private_data; + fuse_mode_t umask; +}; + +#define fuse_main(argc, argv, ops, data)\ + fuse_main_real(argc, argv, ops, sizeof *(ops), data) + +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_main_real)(struct fsp_fuse_env *env, + int argc, char *argv[], + const struct fuse_operations *ops, size_t opsize, void *data); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_is_lib_option)(struct fsp_fuse_env *env, + const char *opt); +FSP_FUSE_API struct fuse *FSP_FUSE_API_NAME(fsp_fuse_new)(struct fsp_fuse_env *env, + struct fuse_chan *ch, struct fuse_args *args, + const struct fuse_operations *ops, size_t opsize, void *data); +FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_destroy)(struct fsp_fuse_env *env, + struct fuse *f); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_loop)(struct fsp_fuse_env *env, + struct fuse *f); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_loop_mt)(struct fsp_fuse_env *env, + struct fuse *f); +FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_exit)(struct fsp_fuse_env *env, + struct fuse *f); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_exited)(struct fsp_fuse_env *env, + struct fuse *f); +FSP_FUSE_API struct fuse_context *FSP_FUSE_API_NAME(fsp_fuse_get_context)(struct fsp_fuse_env *env); + +FSP_FUSE_SYM( +int fuse_main_real(int argc, char *argv[], + const struct fuse_operations *ops, size_t opsize, void *data), +{ + return FSP_FUSE_API_CALL(fsp_fuse_main_real) + (fsp_fuse_env(), argc, argv, ops, opsize, data); +}) + +FSP_FUSE_SYM( +int fuse_is_lib_option(const char *opt), +{ + return FSP_FUSE_API_CALL(fsp_fuse_is_lib_option) + (fsp_fuse_env(), opt); +}) + +FSP_FUSE_SYM( +struct fuse *fuse_new(struct fuse_chan *ch, struct fuse_args *args, + const struct fuse_operations *ops, size_t opsize, void *data), +{ + return FSP_FUSE_API_CALL(fsp_fuse_new) + (fsp_fuse_env(), ch, args, ops, opsize, data); +}) + +FSP_FUSE_SYM( +void fuse_destroy(struct fuse *f), +{ + FSP_FUSE_API_CALL(fsp_fuse_destroy) + (fsp_fuse_env(), f); +}) + +FSP_FUSE_SYM( +int fuse_loop(struct fuse *f), +{ + return FSP_FUSE_API_CALL(fsp_fuse_loop) + (fsp_fuse_env(), f); +}) + +FSP_FUSE_SYM( +int fuse_loop_mt(struct fuse *f), +{ + return FSP_FUSE_API_CALL(fsp_fuse_loop_mt) + (fsp_fuse_env(), f); +}) + +FSP_FUSE_SYM( +void fuse_exit(struct fuse *f), +{ + FSP_FUSE_API_CALL(fsp_fuse_exit) + (fsp_fuse_env(), f); +}) + +FSP_FUSE_SYM( +int fuse_exited(struct fuse *f), +{ + return FSP_FUSE_API_CALL(fsp_fuse_exited) + (fsp_fuse_env(), f); +}) + +FSP_FUSE_SYM( +struct fuse_context *fuse_get_context(void), +{ + return FSP_FUSE_API_CALL(fsp_fuse_get_context) + (fsp_fuse_env()); +}) + +FSP_FUSE_SYM( +int fuse_getgroups(int size, fuse_gid_t list[]), +{ + (void)size; + (void)list; + return -ENOSYS; +}) + +FSP_FUSE_SYM( +int fuse_interrupted(void), +{ + return 0; +}) + +FSP_FUSE_SYM( +int fuse_invalidate(struct fuse *f, const char *path), +{ + (void)f; + (void)path; + return -EINVAL; +}) + +FSP_FUSE_SYM( +int fuse_notify_poll(struct fuse_pollhandle *ph), +{ + (void)ph; + return 0; +}) + +FSP_FUSE_SYM( +struct fuse_session *fuse_get_session(struct fuse *f), +{ + return (struct fuse_session *)f; +}) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hack/winfsp_headers/fuse_common.h b/hack/winfsp_headers/fuse_common.h new file mode 100644 index 0000000..f2b8628 --- /dev/null +++ b/hack/winfsp_headers/fuse_common.h @@ -0,0 +1,184 @@ +/** + * @file fuse/fuse_common.h + * WinFsp FUSE compatible API. + * + * This file is derived from libfuse/include/fuse_common.h: + * FUSE: Filesystem in Userspace + * Copyright 2001-2007 Miklos Szeredi + * + * @copyright 2015-2020 Bill Zissimopoulos + */ +/* + * This file is part of WinFsp. + * + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 3 as published by the Free Software + * Foundation. + * + * Licensees holding a valid commercial license may use this software + * in accordance with the commercial license agreement provided in + * conjunction with the software. The terms and conditions of any such + * commercial license agreement shall govern, supersede, and render + * ineffective any application of the GPLv3 license to this software, + * notwithstanding of any reference thereto in the software or + * associated repository. + */ + +#ifndef FUSE_COMMON_H_ +#define FUSE_COMMON_H_ + +#include "winfsp_fuse.h" +#include "fuse_opt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUSE_MAJOR_VERSION 2 +#define FUSE_MINOR_VERSION 8 +#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) +#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + +#define FUSE_CAP_ASYNC_READ (1 << 0) +#define FUSE_CAP_POSIX_LOCKS (1 << 1) +#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) +#define FUSE_CAP_BIG_WRITES (1 << 5) +#define FUSE_CAP_DONT_MASK (1 << 6) +#define FUSE_CAP_ALLOCATE (1 << 27) /* reserved (OSXFUSE) */ +#define FUSE_CAP_EXCHANGE_DATA (1 << 28) /* reserved (OSXFUSE) */ +#define FUSE_CAP_CASE_INSENSITIVE (1 << 29) /* file system is case insensitive */ +#define FUSE_CAP_VOL_RENAME (1 << 30) /* reserved (OSXFUSE) */ +#define FUSE_CAP_XTIMES (1 << 31) /* reserved (OSXFUSE) */ + +#define FSP_FUSE_CAP_READDIR_PLUS (1 << 21) /* file system supports enhanced readdir */ +#define FSP_FUSE_CAP_READ_ONLY (1 << 22) /* file system is marked read-only */ +#define FSP_FUSE_CAP_STAT_EX (1 << 23) /* file system supports fuse_stat_ex */ +#define FSP_FUSE_CAP_CASE_INSENSITIVE FUSE_CAP_CASE_INSENSITIVE + +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_MAX_IOV 256 + +/* from FreeBSD */ +#define FSP_FUSE_UF_HIDDEN 0x00008000 +#define FSP_FUSE_UF_READONLY 0x00001000 +#define FSP_FUSE_UF_SYSTEM 0x00000080 +#define FSP_FUSE_UF_ARCHIVE 0x00000800 +#if !defined(UF_HIDDEN) +#define UF_HIDDEN FSP_FUSE_UF_HIDDEN +#endif +#if !defined(UF_READONLY) +#define UF_READONLY FSP_FUSE_UF_READONLY +#endif +#if !defined(UF_SYSTEM) +#define UF_SYSTEM FSP_FUSE_UF_SYSTEM +#endif +#if !defined(UF_ARCHIVE) +#define UF_ARCHIVE FSP_FUSE_UF_ARCHIVE +#endif + +struct fuse_file_info +{ + int flags; + unsigned int fh_old; + int writepage; + unsigned int direct_io:1; + unsigned int keep_cache:1; + unsigned int flush:1; + unsigned int nonseekable:1; + unsigned int padding:28; + uint64_t fh; + uint64_t lock_owner; +}; + +struct fuse_conn_info +{ + unsigned proto_major; + unsigned proto_minor; + unsigned async_read; + unsigned max_write; + unsigned max_readahead; + unsigned capable; + unsigned want; + unsigned reserved[25]; +}; + +struct fuse_session; +struct fuse_chan; +struct fuse_pollhandle; +struct fuse_bufvec; +struct fuse_statfs; +struct fuse_setattr_x; + +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_version)(struct fsp_fuse_env *env); +FSP_FUSE_API struct fuse_chan *FSP_FUSE_API_NAME(fsp_fuse_mount)(struct fsp_fuse_env *env, + const char *mountpoint, struct fuse_args *args); +FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_unmount)(struct fsp_fuse_env *env, + const char *mountpoint, struct fuse_chan *ch); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_parse_cmdline)(struct fsp_fuse_env *env, + struct fuse_args *args, + char **mountpoint, int *multithreaded, int *foreground); +FSP_FUSE_API int32_t FSP_FUSE_API_NAME(fsp_fuse_ntstatus_from_errno)(struct fsp_fuse_env *env, + int err); + +FSP_FUSE_SYM( +int fuse_version(void), +{ + return FSP_FUSE_API_CALL(fsp_fuse_version) + (fsp_fuse_env()); +}) + +FSP_FUSE_SYM( +struct fuse_chan *fuse_mount(const char *mountpoint, struct fuse_args *args), +{ + return FSP_FUSE_API_CALL(fsp_fuse_mount) + (fsp_fuse_env(), mountpoint, args); +}) + +FSP_FUSE_SYM( +void fuse_unmount(const char *mountpoint, struct fuse_chan *ch), +{ + FSP_FUSE_API_CALL(fsp_fuse_unmount) + (fsp_fuse_env(), mountpoint, ch); +}) + +FSP_FUSE_SYM( +int fuse_parse_cmdline(struct fuse_args *args, + char **mountpoint, int *multithreaded, int *foreground), +{ + return FSP_FUSE_API_CALL(fsp_fuse_parse_cmdline) + (fsp_fuse_env(), args, mountpoint, multithreaded, foreground); +}) + +FSP_FUSE_SYM( +void fuse_pollhandle_destroy(struct fuse_pollhandle *ph), +{ + (void)ph; +}) + +FSP_FUSE_SYM( +int fuse_daemonize(int foreground), +{ + return fsp_fuse_daemonize(foreground); +}) + +FSP_FUSE_SYM( +int fuse_set_signal_handlers(struct fuse_session *se), +{ + return fsp_fuse_set_signal_handlers(se); +}) + +FSP_FUSE_SYM( +void fuse_remove_signal_handlers(struct fuse_session *se), +{ + (void)se; + fsp_fuse_set_signal_handlers(0); +}) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hack/winfsp_headers/fuse_opt.h b/hack/winfsp_headers/fuse_opt.h new file mode 100644 index 0000000..9d184da --- /dev/null +++ b/hack/winfsp_headers/fuse_opt.h @@ -0,0 +1,133 @@ +/** + * @file fuse/fuse_opt.h + * WinFsp FUSE compatible API. + * + * This file is derived from libfuse/include/fuse_opt.h: + * FUSE: Filesystem in Userspace + * Copyright 2001-2007 Miklos Szeredi + * + * @copyright 2015-2020 Bill Zissimopoulos + */ +/* + * This file is part of WinFsp. + * + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 3 as published by the Free Software + * Foundation. + * + * Licensees holding a valid commercial license may use this software + * in accordance with the commercial license agreement provided in + * conjunction with the software. The terms and conditions of any such + * commercial license agreement shall govern, supersede, and render + * ineffective any application of the GPLv3 license to this software, + * notwithstanding of any reference thereto in the software or + * associated repository. + */ + +#ifndef FUSE_OPT_H_ +#define FUSE_OPT_H_ + +#include "winfsp_fuse.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUSE_OPT_KEY(templ, key) { templ, -1, key } +#define FUSE_OPT_END { NULL, 0, 0 } + +#define FUSE_OPT_KEY_OPT -1 +#define FUSE_OPT_KEY_NONOPT -2 +#define FUSE_OPT_KEY_KEEP -3 +#define FUSE_OPT_KEY_DISCARD -4 + +#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } + +struct fuse_opt +{ + const char *templ; + unsigned int offset; + int value; +}; + +struct fuse_args +{ + int argc; + char **argv; + int allocated; +}; + +typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, + struct fuse_args *outargs); + +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_parse)(struct fsp_fuse_env *env, + struct fuse_args *args, void *data, + const struct fuse_opt opts[], fuse_opt_proc_t proc); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_arg)(struct fsp_fuse_env *env, + struct fuse_args *args, const char *arg); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_insert_arg)(struct fsp_fuse_env *env, + struct fuse_args *args, int pos, const char *arg); +FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_opt_free_args)(struct fsp_fuse_env *env, + struct fuse_args *args); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_opt)(struct fsp_fuse_env *env, + char **opts, const char *opt); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_opt_escaped)(struct fsp_fuse_env *env, + char **opts, const char *opt); +FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_match)(struct fsp_fuse_env *env, + const struct fuse_opt opts[], const char *opt); + +FSP_FUSE_SYM( +int fuse_opt_parse(struct fuse_args *args, void *data, + const struct fuse_opt opts[], fuse_opt_proc_t proc), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_parse) + (fsp_fuse_env(), args, data, opts, proc); +}) + +FSP_FUSE_SYM( +int fuse_opt_add_arg(struct fuse_args *args, const char *arg), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_add_arg) + (fsp_fuse_env(), args, arg); +}) + +FSP_FUSE_SYM( +int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_insert_arg) + (fsp_fuse_env(), args, pos, arg); +}) + +FSP_FUSE_SYM( +void fuse_opt_free_args(struct fuse_args *args), +{ + FSP_FUSE_API_CALL(fsp_fuse_opt_free_args) + (fsp_fuse_env(), args); +}) + +FSP_FUSE_SYM( +int fuse_opt_add_opt(char **opts, const char *opt), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_add_opt) + (fsp_fuse_env(), opts, opt); +}) + +FSP_FUSE_SYM( +int fuse_opt_add_opt_escaped(char **opts, const char *opt), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_add_opt_escaped) + (fsp_fuse_env(), opts, opt); +}) + +FSP_FUSE_SYM( +int fuse_opt_match(const struct fuse_opt opts[], const char *opt), +{ + return FSP_FUSE_API_CALL(fsp_fuse_opt_match) + (fsp_fuse_env(), opts, opt); +}) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hack/winfsp_headers/winfsp_fuse.h b/hack/winfsp_headers/winfsp_fuse.h new file mode 100644 index 0000000..241f49e --- /dev/null +++ b/hack/winfsp_headers/winfsp_fuse.h @@ -0,0 +1,434 @@ +/** + * @file fuse/winfsp_fuse.h + * WinFsp FUSE compatible API. + * + * @copyright 2015-2020 Bill Zissimopoulos + */ +/* + * This file is part of WinFsp. + * + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 3 as published by the Free Software + * Foundation. + * + * Licensees holding a valid commercial license may use this software + * in accordance with the commercial license agreement provided in + * conjunction with the software. The terms and conditions of any such + * commercial license agreement shall govern, supersede, and render + * ineffective any application of the GPLv3 license to this software, + * notwithstanding of any reference thereto in the software or + * associated repository. + */ + +#ifndef FUSE_WINFSP_FUSE_H_INCLUDED +#define FUSE_WINFSP_FUSE_H_INCLUDED + +#include +#include +#if !defined(WINFSP_DLL_INTERNAL) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(FSP_FUSE_API) +#if defined(WINFSP_DLL_INTERNAL) +#define FSP_FUSE_API __declspec(dllexport) +#else +#define FSP_FUSE_API __declspec(dllimport) +#endif +#endif + +#if !defined(FSP_FUSE_API_NAME) +#define FSP_FUSE_API_NAME(n) (n) +#endif + +#if !defined(FSP_FUSE_API_CALL) +#define FSP_FUSE_API_CALL(n) (n) +#endif + +#if !defined(FSP_FUSE_SYM) +#if !defined(CYGFUSE) +#define FSP_FUSE_SYM(proto, ...) static inline proto { __VA_ARGS__ } +#else +#define FSP_FUSE_SYM(proto, ...) proto; +#endif +#endif + +#define FSP_FUSE_DEVICE_TYPE (0x8000 | 'W' | 'F' * 0x100) /* DeviceIoControl -> ioctl */ +#define FSP_FUSE_CTLCODE_FROM_IOCTL(cmd)\ + (FSP_FUSE_DEVICE_TYPE << 16) | (((cmd) & 0x0fff) << 2) +#define FSP_FUSE_IOCTL(cmd, isiz, osiz) \ + ( \ + (((osiz) != 0) << 31) | \ + (((isiz) != 0) << 30) | \ + (((isiz) | (osiz)) << 16) | \ + (cmd) \ + ) + +/* + * FUSE uses a number of types (notably: struct stat) that are OS specific. + * Furthermore there are sometimes multiple definitions of the same type even + * within the same OS. This is certainly true on Windows, where these types + * are not even native. + * + * For this reason we will define our own fuse_* types which represent the + * types as the WinFsp DLL expects to see them. We will define these types + * to be compatible with the equivalent Cygwin types as we want WinFsp-FUSE + * to be usable from Cygwin. + */ + +#define FSP_FUSE_STAT_FIELD_DEFN \ + fuse_dev_t st_dev; \ + fuse_ino_t st_ino; \ + fuse_mode_t st_mode; \ + fuse_nlink_t st_nlink; \ + fuse_uid_t st_uid; \ + fuse_gid_t st_gid; \ + fuse_dev_t st_rdev; \ + fuse_off_t st_size; \ + struct fuse_timespec st_atim; \ + struct fuse_timespec st_mtim; \ + struct fuse_timespec st_ctim; \ + fuse_blksize_t st_blksize; \ + fuse_blkcnt_t st_blocks; \ + struct fuse_timespec st_birthtim; +#define FSP_FUSE_STAT_EX_FIELD_DEFN \ + FSP_FUSE_STAT_FIELD_DEFN \ + uint32_t st_flags; \ + uint32_t st_reserved32[3]; \ + uint64_t st_reserved64[2]; + +#if defined(_WIN64) || defined(_WIN32) + +typedef uint32_t fuse_uid_t; +typedef uint32_t fuse_gid_t; +typedef int32_t fuse_pid_t; + +typedef uint32_t fuse_dev_t; +typedef uint64_t fuse_ino_t; +typedef uint32_t fuse_mode_t; +typedef uint16_t fuse_nlink_t; +typedef int64_t fuse_off_t; + +#if defined(_WIN64) +typedef uint64_t fuse_fsblkcnt_t; +typedef uint64_t fuse_fsfilcnt_t; +#else +typedef uint32_t fuse_fsblkcnt_t; +typedef uint32_t fuse_fsfilcnt_t; +#endif +typedef int32_t fuse_blksize_t; +typedef int64_t fuse_blkcnt_t; + +#if defined(_WIN64) +struct fuse_utimbuf +{ + int64_t actime; + int64_t modtime; +}; +struct fuse_timespec +{ + int64_t tv_sec; + int64_t tv_nsec; +}; +#else +struct fuse_utimbuf +{ + int32_t actime; + int32_t modtime; +}; +struct fuse_timespec +{ + int32_t tv_sec; + int32_t tv_nsec; +}; +#endif + +#if !defined(FSP_FUSE_USE_STAT_EX) +struct fuse_stat +{ + FSP_FUSE_STAT_FIELD_DEFN +}; +#else +struct fuse_stat +{ + FSP_FUSE_STAT_EX_FIELD_DEFN +}; +#endif + +#if defined(_WIN64) +struct fuse_statvfs +{ + uint64_t f_bsize; + uint64_t f_frsize; + fuse_fsblkcnt_t f_blocks; + fuse_fsblkcnt_t f_bfree; + fuse_fsblkcnt_t f_bavail; + fuse_fsfilcnt_t f_files; + fuse_fsfilcnt_t f_ffree; + fuse_fsfilcnt_t f_favail; + uint64_t f_fsid; + uint64_t f_flag; + uint64_t f_namemax; +}; +#else +struct fuse_statvfs +{ + uint32_t f_bsize; + uint32_t f_frsize; + fuse_fsblkcnt_t f_blocks; + fuse_fsblkcnt_t f_bfree; + fuse_fsblkcnt_t f_bavail; + fuse_fsfilcnt_t f_files; + fuse_fsfilcnt_t f_ffree; + fuse_fsfilcnt_t f_favail; + uint32_t f_fsid; + uint32_t f_flag; + uint32_t f_namemax; +}; +#endif + +struct fuse_flock +{ + int16_t l_type; + int16_t l_whence; + fuse_off_t l_start; + fuse_off_t l_len; + fuse_pid_t l_pid; +}; + +#if defined(WINFSP_DLL_INTERNAL) +#define FSP_FUSE_ENV_INIT \ + { \ + 'W', \ + MemAlloc, MemFree, \ + fsp_fuse_daemonize, \ + fsp_fuse_set_signal_handlers, \ + 0/*conv_to_win_path*/, \ + 0/*winpid_to_pid*/, \ + { 0 }, \ + } +#else +#define FSP_FUSE_ENV_INIT \ + { \ + 'W', \ + malloc, free, \ + fsp_fuse_daemonize, \ + fsp_fuse_set_signal_handlers, \ + 0/*conv_to_win_path*/, \ + 0/*winpid_to_pid*/, \ + { 0 }, \ + } +#endif + +#elif defined(__CYGWIN__) + +#include +#include +#include +#include +#include +#include +#include + +#define fuse_uid_t uid_t +#define fuse_gid_t gid_t +#define fuse_pid_t pid_t + +#define fuse_dev_t dev_t +#define fuse_ino_t ino_t +#define fuse_mode_t mode_t +#define fuse_nlink_t nlink_t +#define fuse_off_t off_t + +#define fuse_fsblkcnt_t fsblkcnt_t +#define fuse_fsfilcnt_t fsfilcnt_t +#define fuse_blksize_t blksize_t +#define fuse_blkcnt_t blkcnt_t + +#define fuse_utimbuf utimbuf +#define fuse_timespec timespec + +#if !defined(FSP_FUSE_USE_STAT_EX) +#define fuse_stat stat +#else +struct fuse_stat +{ + FSP_FUSE_STAT_EX_FIELD_DEFN +}; +#endif +#define fuse_statvfs statvfs +#define fuse_flock flock + +#define FSP_FUSE_ENV_INIT \ + { \ + 'C', \ + malloc, free, \ + fsp_fuse_daemonize, \ + fsp_fuse_set_signal_handlers, \ + fsp_fuse_conv_to_win_path, \ + fsp_fuse_winpid_to_pid, \ + { 0 }, \ + } + +/* + * Note that long is 8 bytes long in Cygwin64 and 4 bytes long in Win64. + * For this reason we avoid using long anywhere in these headers. + */ + +#else +#error unsupported environment +#endif + +struct fuse_stat_ex +{ + FSP_FUSE_STAT_EX_FIELD_DEFN +}; + +struct fsp_fuse_env +{ + unsigned environment; + void *(*memalloc)(size_t); + void (*memfree)(void *); + int (*daemonize)(int); + int (*set_signal_handlers)(void *); + char *(*conv_to_win_path)(const char *); + fuse_pid_t (*winpid_to_pid)(uint32_t); + void (*reserved[2])(); +}; + +FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_signal_handler)(int sig); + +#if defined(_WIN64) || defined(_WIN32) + +static inline int fsp_fuse_daemonize(int foreground) +{ + (void)foreground; + return 0; +} + +static inline int fsp_fuse_set_signal_handlers(void *se) +{ + (void)se; + return 0; +} + +#elif defined(__CYGWIN__) + +static inline int fsp_fuse_daemonize(int foreground) +{ + int daemon(int nochdir, int noclose); + int chdir(const char *path); + + if (!foreground) + { + if (-1 == daemon(0, 0)) + return -1; + } + else + chdir("/"); + + return 0; +} + +static inline void *fsp_fuse_signal_thread(void *psigmask) +{ + int sig; + + if (0 == sigwait((sigset_t *)psigmask, &sig)) + FSP_FUSE_API_CALL(fsp_fuse_signal_handler)(sig); + + return 0; +} + +static inline int fsp_fuse_set_signal_handlers(void *se) +{ +#define FSP_FUSE_SET_SIGNAL_HANDLER(sig, newha)\ + if (-1 != sigaction((sig), 0, &oldsa) &&\ + oldsa.sa_handler == (se ? SIG_DFL : (newha)))\ + {\ + newsa.sa_handler = se ? (newha) : SIG_DFL;\ + sigaction((sig), &newsa, 0);\ + } +#define FSP_FUSE_SIGADDSET(sig)\ + if (-1 != sigaction((sig), 0, &oldsa) &&\ + oldsa.sa_handler == SIG_DFL)\ + sigaddset(&sigmask, (sig)); + + static sigset_t sigmask; + static pthread_t sigthr; + struct sigaction oldsa, newsa; + + // memset instead of initializer to avoid GCC -Wmissing-field-initializers warning + memset(&newsa, 0, sizeof newsa); + + if (0 != se) + { + if (0 == sigthr) + { + FSP_FUSE_SET_SIGNAL_HANDLER(SIGPIPE, SIG_IGN); + + sigemptyset(&sigmask); + FSP_FUSE_SIGADDSET(SIGHUP); + FSP_FUSE_SIGADDSET(SIGINT); + FSP_FUSE_SIGADDSET(SIGTERM); + if (0 != pthread_sigmask(SIG_BLOCK, &sigmask, 0)) + return -1; + + if (0 != pthread_create(&sigthr, 0, fsp_fuse_signal_thread, &sigmask)) + return -1; + } + } + else + { + if (0 != sigthr) + { + pthread_cancel(sigthr); + pthread_join(sigthr, 0); + sigthr = 0; + + if (0 != pthread_sigmask(SIG_UNBLOCK, &sigmask, 0)) + return -1; + sigemptyset(&sigmask); + + FSP_FUSE_SET_SIGNAL_HANDLER(SIGPIPE, SIG_IGN); + } + } + + return 0; + +#undef FSP_FUSE_SIGADDSET +#undef FSP_FUSE_SET_SIGNAL_HANDLER +} + +static inline char *fsp_fuse_conv_to_win_path(const char *path) +{ + void *cygwin_create_path(unsigned, const void *); + return (char *)cygwin_create_path( + 0/*CCP_POSIX_TO_WIN_A*/ | 0x100/*CCP_RELATIVE*/, + path); +} + +static inline fuse_pid_t fsp_fuse_winpid_to_pid(uint32_t winpid) +{ + pid_t cygwin_winpid_to_pid(int winpid); + pid_t pid = cygwin_winpid_to_pid(winpid); + return -1 != pid ? pid : (fuse_pid_t)winpid; +} +#endif + + +static inline struct fsp_fuse_env *fsp_fuse_env(void) +{ + static struct fsp_fuse_env env = FSP_FUSE_ENV_INIT; + return &env; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/integration/Makefile b/integration/Makefile new file mode 100644 index 0000000..d7ee84e --- /dev/null +++ b/integration/Makefile @@ -0,0 +1,27 @@ +DURATION ?= 10 + +all: xattrs fsx s3test + + +xattrs: + touch /tmp/jfs/test_xattrs + setfattr -n user.k -v value /tmp/jfs/test_xattrs + getfattr -n user.k /tmp/jfs/test_xattrs | grep -q user.k= + +fsracer: secfs.test/tools/bin/fsracer + secfs.test/tools/bin/fsracer $(DURATION) /tmp/jfs > fsracer.log + +fsx: secfs.test/tools/bin/fsx + secfs.test/tools/bin/fsx -d $(DURATION) -p 10000 -F 100000 /tmp/jfs/fsx.out + +secfs.test/tools/bin/fsx: secfs.test + +secfs.test/tools/bin/fsracer: secfs.test + +secfs.test: + git clone https://github.com/billziss-gh/secfs.test.git + make -C secfs.test >secfs.test-build-integration.log 2>&1 + +s3test: + pip install awscli --upgrade + bash s3gateway_test.sh diff --git a/integration/integration_test.go b/integration/integration_test.go new file mode 100644 index 0000000..a5acff6 --- /dev/null +++ b/integration/integration_test.go @@ -0,0 +1,387 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package integration + +import ( + "fmt" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "strings" + "syscall" + "testing" + "time" + + jfsgateway "github.com/juicedata/juicefs/pkg/gateway" + "github.com/juicedata/juicefs/pkg/version" + mcli "github.com/minio/cli" + minio "github.com/minio/minio/cmd" + "github.com/minio/minio/pkg/auth" + + "github.com/google/uuid" + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/fuse" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/vfs" +) + +func getDefaultBucketName() string { + var defaultBucket string + switch runtime.GOOS { + case "darwin": + homeDir, err := os.UserHomeDir() + if err != nil { + log.Fatalf("%v", err) + } + defaultBucket = path.Join(homeDir, ".juicefs", "local") + case "windows": + defaultBucket = path.Join("C:/jfs/local") + default: + defaultBucket = "/var/jfs" + } + return defaultBucket +} + +func createSimpleStorage(format *meta.Format) (object.ObjectStorage, error) { + object.UserAgent = "JuiceFS" + var blob object.ObjectStorage + var err error + blob, err = object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey) + if err != nil { + return nil, err + } + blob = object.WithPrefix(blob, format.Name+"/") + return blob, nil +} + +func formatSimpleMethod(url, name string) { + m := meta.NewClient(url, &meta.Config{Retries: 2}) + format := meta.Format{ + Name: name, + UUID: uuid.New().String(), + Storage: "file", + Bucket: getDefaultBucketName() + "/", + AccessKey: "", + SecretKey: "", + Shards: 0, + BlockSize: 4096, + } + err := m.Init(format, true) + if err != nil { + log.Fatalf("format: %s", err) + } + log.Printf("Volume is formatted as %+v", format) +} + +func mountSimpleMethod(url, mp string) { + + fi, err := os.Stat(mp) + if !strings.Contains(mp, ":") && err != nil { + if err := os.MkdirAll(mp, 0777); err != nil { + if os.IsExist(err) { + // a broken mount point, umount it + if err = doSimpleUmount(mp, true); err != nil { + log.Fatalf("umount %s: %s", mp, err) + } + } else { + log.Fatalf("create %s: %s", mp, err) + } + } + } else if err == nil && fi.Size() == 0 { + // a broken mount point, umount it + if err = doSimpleUmount(mp, true); err != nil { + log.Fatalf("umount %s: %s", mp, err) + } + } + + metaConf := &meta.Config{ + Retries: 10, + Strict: true, + MountPoint: mp, + } + m := meta.NewClient(url, metaConf) + format, err := m.Load() + if err != nil { + log.Fatalf("load setting: %s", err) + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + MaxUpload: 20, + BufferSize: 300 << 20, + CacheSize: 1024, + CacheDir: "memory", + } + + blob, err := createSimpleStorage(format) + if err != nil { + log.Fatalf("object storage: %s", err) + } + log.Printf("Data use %s", blob) + store := chunk.NewCachedStore(blob, chunkConf) + + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + }) + + conf := &vfs.Config{ + Meta: metaConf, + Format: format, + Version: "Juicefs", + Mountpoint: mp, + Chunk: &chunkConf, + } + + go checkMountpointInTenSeconds(mp, nil) + + err = m.NewSession() + if err != nil { + log.Fatalf("new session: %s", err) + } + + conf.AttrTimeout = time.Second + conf.EntryTimeout = time.Second + conf.DirEntryTimeout = time.Second + v := vfs.NewVFS(conf, m, store) + serverErr := fuse.Serve(v, "", true) + if serverErr != nil { + log.Fatalf("fuse server err: %s\n", serverErr) + } + closeErr := m.CloseSession() + if closeErr != nil { + log.Fatalf("close session err: %s\n", closeErr) + } +} + +func doSimpleUmount(mp string, force bool) error { + var cmd *exec.Cmd + switch runtime.GOOS { + case "darwin": + if force { + cmd = exec.Command("diskutil", "umount", "force", mp) + } else { + cmd = exec.Command("diskutil", "umount", mp) + } + case "linux": + if _, err := exec.LookPath("fusermount"); err == nil { + if force { + cmd = exec.Command("fusermount", "-uz", mp) + } else { + cmd = exec.Command("fusermount", "-u", mp) + } + } else { + if force { + cmd = exec.Command("umount", "-l", mp) + } else { + cmd = exec.Command("umount", mp) + } + } + case "windows": + if !force { + _ = os.Mkdir(filepath.Join(mp, ".UMOUNTIT"), 0755) + return nil + } else { + cmd = exec.Command("taskkill", "/IM", "juicefs.exe", "/F") + } + default: + return fmt.Errorf("OS %s is not supported", runtime.GOOS) + } + out, err := cmd.CombinedOutput() + if err != nil { + log.Print(string(out)) + } + return err +} + +func checkMountpointInTenSeconds(mp string, ch chan int) { + for i := 0; i < 20; i++ { + time.Sleep(time.Millisecond * 500) + st, err := os.Stat(mp) + if err == nil { + if sys, ok := st.Sys().(*syscall.Stat_t); ok && sys.Ino == 1 { + //0 is success + if ch != nil { + ch <- 0 + } + log.Printf("\033[92mOK\033[0m, %s is ready ", mp) + return + } + } + os.Stdout.WriteString(".") + os.Stdout.Sync() + } + //1 is failure + if ch != nil { + ch <- 1 + } + os.Stdout.WriteString("\n") + log.Printf("fail to mount after 10 seconds, please mount in foreground") +} + +func setUp(metaUrl, name, mp string) int { + ch := make(chan int) + formatSimpleMethod(metaUrl, name) + go checkMountpointInTenSeconds(mp, ch) + go mountSimpleMethod(metaUrl, mp) + chInt := <-ch + return chInt +} + +func TestMain(m *testing.M) { + metaUrl := "sqlite3://tmpsql" + mp := "/tmp/jfs" + result := setUp(metaUrl, "pics", mp) + if result != 0 { + log.Fatalln("mount is not completed in ten seconds") + return + } + go func() { + err := setUpGateway() + if err != nil { + log.Fatalf("set up gateway failed: %v", err) + } + }() + + code := m.Run() + umountErr := doSimpleUmount(mp, true) + if umountErr != nil { + log.Fatalf("umount err: %s\n", umountErr) + } + os.Exit(code) +} + +func TestIntegration(t *testing.T) { + makeCmd := exec.Command("make") + out, err := makeCmd.CombinedOutput() + if err != nil { + t.Logf("std out:\n%s\n", string(out)) + t.Fatalf("std err failed with %s\n", err) + } else { + t.Logf("std out:\n%s\n", string(out)) + } +} + +var gw *GateWay +var metaUrl = "redis://localhost:6379/11" + +func setUpGateway() error { + formatSimpleMethod(metaUrl, "gateway-test") + address := "0.0.0.0:9008" + gw = &GateWay{} + args := []string{"gateway", "--address", address, "--anonymous"} + app := &mcli.App{ + Action: gateway2, + Flags: []mcli.Flag{ + mcli.StringFlag{ + Name: "address", + Value: ":9000", + Usage: "bind to a specific ADDRESS:PORT, ADDRESS can be an IP or hostname", + }, + mcli.BoolFlag{ + Name: "anonymous", + Usage: "hide sensitive information from logging", + }, + mcli.BoolFlag{ + Name: "json", + Usage: "output server logs and startup information in json format", + }, + mcli.BoolFlag{ + Name: "quiet", + Usage: "disable MinIO startup information", + }, + }, + } + return app.Run(args) +} + +func gateway2(ctx *mcli.Context) error { + minio.StartGateway(ctx, gw) + return nil +} + +type GateWay struct{} + +func (g *GateWay) Name() string { + return "JuiceFS" +} + +func (g *GateWay) Production() bool { + return true +} + +func (g *GateWay) NewGatewayLayer(creds auth.Credentials) (minio.ObjectLayer, error) { + + m := meta.NewClient(metaUrl, &meta.Config{ + Retries: 10, + Strict: true, + }) + + format, err := m.Load() + if err != nil { + log.Fatalf("load setting: %s", err) + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + MaxUpload: 20, + BufferSize: 300 << 20, + CacheSize: 1024, + CacheDir: "memory", + } + + blob, err := createSimpleStorage(format) + if err != nil { + log.Fatalf("object storage: %s", err) + } + + store := chunk.NewCachedStore(blob, chunkConf) + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + chunkid := args[0].(uint64) + length := args[1].(uint32) + return store.Remove(chunkid, int(length)) + }) + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + }) + err = m.NewSession() + if err != nil { + log.Fatalf("new session: %s", err) + } + + conf := &vfs.Config{ + Meta: &meta.Config{ + Retries: 10, + }, + Format: format, + Version: version.Version(), + AttrTimeout: time.Second, + EntryTimeout: time.Second, + DirEntryTimeout: time.Second, + Chunk: &chunkConf, + } + return jfsgateway.NewJFSGateway(conf, m, store, true, true) +} diff --git a/integration/s3gateway_test.sh b/integration/s3gateway_test.sh new file mode 100755 index 0000000..82b6069 --- /dev/null +++ b/integration/s3gateway_test.sh @@ -0,0 +1,1732 @@ +#!/bin/bash + +# Mint (C) 2017-2020 Minio, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# environment + +os="linux" +errno=$errno +if [[ `uname -a` =~ "Darwin" ]];then + os="mac" + errno=254 +fi +echo "os=$os" + +set -x +os="linux" +errno=$errno +if [[ `uname -a` =~ "Darwin" ]];then + os="mac" + errno=254 +fi +echo "os=$os" + + +MINT_DATA_DIR=testdata +MINT_MODE=core +SERVER_ENDPOINT="127.0.0.1:9008" +ACCESS_KEY="testUser" +SECRET_KEY="testUserPassword" +ENABLE_HTTPS=0 +SERVER_REGION=us-east-1 +ENABLE_VIRTUAL_STYLE=0 + + + + +# create testdata +declare -A data_file_map +data_file_map["datafile-0-b"]="0" +data_file_map["datafile-1-b"]="1" +data_file_map["datafile-1-kB"]="1K" +data_file_map["datafile-10-kB"]="10K" +data_file_map["datafile-33-kB"]="33K" +data_file_map["datafile-100-kB"]="100K" +data_file_map["datafile-1.03-MB"]="1056K" +data_file_map["datafile-1-MB"]="1M" +data_file_map["datafile-5-MB"]="5M" +data_file_map["datafile-5243880-b"]="5243880" +data_file_map["datafile-6-MB"]="6M" +data_file_map["datafile-10-MB"]="10M" +data_file_map["datafile-11-MB"]="11M" +data_file_map["datafile-65-MB"]="65M" +data_file_map["datafile-129-MB"]="129M" + +mkdir -p "$MINT_DATA_DIR" + + +if [ ! "$(ls $MINT_DATA_DIR)" ]; then + for filename in "${!data_file_map[@]}"; do + echo "creating $MINT_DATA_DIR/$filename" + if ! shred -n 1 -s "${data_file_map[$filename]}" - 1>"$MINT_DATA_DIR/$filename" 2>/dev/null; then + echo "unable to create data file $MINT_DATA_DIR/$filename" + exit 1 + fi + done +fi + +# configuration +aws configure set aws_access_key_id "$ACCESS_KEY" +aws configure set aws_secret_access_key "$SECRET_KEY" +aws configure set default.region "$SERVER_REGION" + +# run tests for virtual style if provided +if [ "$ENABLE_VIRTUAL_STYLE" -eq 1 ]; then + # Setup endpoint scheme + endpoint="http://$DOMAIN:$SERVER_PORT" + if [ "$ENABLE_HTTPS" -eq 1 ]; then + endpoint="https://$DOMAIN:$SERVER_PORT" + fi + dnsmasq --address="/$DOMAIN/$SERVER_IP" --user=root + echo -e "nameserver 127.0.0.1\n$(cat /etc/resolv.conf)" > /etc/resolv.conf + aws configure set default.s3.addressing_style virtual +# ./test.sh "$endpoint" 1>>"$output_log_file" 2>"$error_log_file" + ./test.sh "$endpoint" + aws configure set default.s3.addressing_style path +fi + +endpoint="http://$SERVER_ENDPOINT" +if [ "$ENABLE_HTTPS" -eq 1 ]; then + endpoint="https://$SERVER_ENDPOINT" +fi +# run path style tests +# ./test.sh "$endpoint" 1>>"$output_log_file" 2>"$error_log_file" + + +# test +function get_md5() { + if [ $os == "mac" ]; then + md5rt=$(md5 "$1" | awk '{print $4}') + else + md5rt=$(md5sum "$1" | awk '{print $1}') + fi +} + +get_md5 "${MINT_DATA_DIR}/datafile-1-kB" +HASH_1_KB=$md5rt + +get_md5 "${MINT_DATA_DIR}/datafile-65-MB" +HASH_65_MB=$md5rt + +_init() { + AWS="aws --endpoint-url $1" +} + + +function get_time() { + date +%s%N +} + +function get_duration() { + start_time=$1 + end_time=$(get_time) + + echo $(( (end_time - start_time) / 1000000 )) +} + +function log_success() { + function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2") + printf '{"name": "awscli", "duration": %d, "function": %s, "status": "PASS"}\n' "$1" "$function" +} + +function log_failure() { + function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2") + err=$(echo "$3" | tr -d '\n') + printf '{"name": "awscli", "duration": %d, "function": %s, "status": "FAIL", "error": "%s"}\n' "$1" "$function" "$err" +} + +function log_alert() { + function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2") + err=$(echo "$4" | tr -d '\n') + printf '{"name": "awscli", "duration": %d, "function": %s, "status": "FAIL", "alert": "%s", "error": "%s"}\n' "$1" "$function" "$3" "$err" +} + +function make_bucket() { + # Make bucket + bucket_name="awscli-mint-test-bucket-$RANDOM" + function="${AWS} s3api create-bucket --bucket ${bucket_name}" + + # execute the test + out=$($function 2>&1) + rv=$? + + # if command is successful print bucket_name or print error + if [ $rv -eq 0 ]; then + echo "${bucket_name}" + else + echo "${out}" + fi + + return $rv +} + +function delete_bucket() { + # Delete bucket + function="${AWS} s3 rb s3://${1} --force" + out=$($function 2>&1) + rv=$? + + # echo the output + echo "${out}" + + return $rv +} + +# Tests creating, stat and delete on a bucket. +function test_create_bucket() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + # save the ref to function being tested, so it can be logged + test_function=${function} + + # if make_bucket is successful stat the bucket + if [ $rv -eq 0 ]; then + function="${AWS} s3api head-bucket --bucket ${bucket_name}" + out=$($function 2>&1) + rv=$? + else + # if make bucket failes, $bucket_name has the error output + out="${bucket_name}" + fi + + # if stat bucket is successful remove the bucket + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "${bucket_name}") + rv=$? + else + # if make bucket failes, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests creating and deleting an object. +function test_upload_object() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # if upload succeeds download the file + if [ $rv -eq 0 ]; then + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB /tmp/datafile-1-kB" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + # calculate the md5 hash of downloaded file + get_md5 "/tmp/datafile-1-kB" + hash2=$md5rt + fi + + # if download succeeds, verify downloaded file + if [ $rv -eq 0 ]; then + if [ "$HASH_1_KB" == "$hash2" ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove download file + rm -f /tmp/datafile-1-kB + else + rv=1 + out="Checksum verification failed for uploaded object" + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Test lookup a directory prefix. +function test_lookup_object_prefix() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds create a directory. + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --bucket ${bucket_name} --key prefix/directory/" + # save the ref to function being tested, so it can be logged + test_function=${function} + + out=$($function 2>&1) + + rv=$? + else + # if make_bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + ## Attempt an overwrite of the prefix again and should succeed as well. + function="${AWS} s3api put-object --bucket ${bucket_name} --key prefix/directory/" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # if upload succeeds lookup for the prefix. + if [ $rv -eq 0 ]; then + function="${AWS} s3api head-object --bucket ${bucket_name} --key prefix/directory/" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # if directory create succeeds, upload the object. + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key prefix/directory/datafile-1-kB" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # Attempt a delete on prefix shouldn't delete the directory since we have an object inside it. + if [ $rv -eq 0 ]; then + function="${AWS} s3api delete-object --bucket ${bucket_name} --key prefix/directory/" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # if upload succeeds lookup for the object should succeed. + if [ $rv -eq 0 ]; then + function="${AWS} s3api head-object --bucket ${bucket_name} --key prefix/directory/datafile-1-kB" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # delete bucket + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + + if [ $rv -ne 0 ]; then + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + else + log_success "$(get_duration "$start_time")" "${test_function}" + fi + + return $rv +} + +# Tests listing objects for both v1 and v2 API. +function test_list_objects() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # if upload objects succeeds, list objects with existing prefix + if [ $rv -eq 0 ]; then + function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix datafile-1-kB" + test_function=${function} + out=$($function) + rv=$? + key_name=$(echo "$out" | jq -r .Contents[].Key) + if [ $rv -eq 0 ] && [ "$key_name" != "datafile-1-kB" ]; then + rv=1 + # since rv is 0, command passed, but didn't return expected value. In this case set the output + out="list-objects with existing prefix failed" + fi + fi + + # if upload objects succeeds, list objects without existing prefix + if [ $rv -eq 0 ]; then + function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix linux" + out=$($function) + rv=$? + key_name=$(echo "$out" | jq -r .Contents[].Key) + if [ $rv -eq 0 ] && [ "$key_name" != "" ]; then + rv=1 + out="list-objects without existing prefix failed" + fi + fi + + # if upload objects succeeds, list objectsv2 with existing prefix + if [ $rv -eq 0 ]; then + function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB" + out=$($function) + rv=$? + key_name=$(echo "$out" | jq -r .Contents[].Key) + if [ $rv -eq 0 ] && [ "$key_name" != "datafile-1-kB" ]; then + rv=1 + out="list-objects-v2 with existing prefix failed" + fi + fi + + # if upload objects succeeds, list objectsv2 without existing prefix + if [ $rv -eq 0 ]; then + function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix linux" + out=$($function) + rv=$? + key_name=$(echo "$out" | jq -r .Contents[].Key) + if [ $rv -eq 0 ] && [ "$key_name" != "" ]; then + rv=1 + out="list-objects-v2 without existing prefix failed" + fi + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove download file + rm -f /tmp/datafile-1-kB + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + rm -f /tmp/datafile-1-kB + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests multipart API with 0 byte part. +function test_multipart_upload_0byte() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + object_name=${bucket_name}"-object" + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-0-b --bucket ${bucket_name} --key datafile-0-b" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + # create multipart + function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}" + test_function=${function} + out=$($function) + rv=$? + upload_id=$(echo "$out" | jq -r .UploadId) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 1 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-0-b --upload-id ${upload_id} --part-number 1" + out=$($function) + rv=$? + etag1=$(echo "$out" | jq -r .ETag) + fi + + if [ $rv -eq 0 ]; then + # Create a multipart struct file for completing multipart transaction + echo "{ + \"Parts\": [ + { + \"ETag\": ${etag1}, + \"PartNumber\": 1 + } + ] + }" >> /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + # Use saved etags to complete the multipart transaction + function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}" + out=$($function) + rv=$? + etag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//') + if [ "${etag}" == "" ]; then + rv=1 + out="complete-multipart-upload failed" + fi + fi + + if [ $rv -eq 0 ]; then + function="${AWS} s3api get-object --bucket ${bucket_name} --key ${object_name} /tmp/datafile-0-b" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + if [ $rv -eq 0 ]; then + ret_etag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//') + # match etag + if [ "$etag" != "$ret_etag" ]; then + rv=1 + out="Etag mismatch for multipart 0 byte object" + fi + rm -f /tmp/datafile-0-b + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove temp file + rm -f /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + rm -f /tmp/multipart + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests multipart API by making each individual calls. +function test_multipart_upload() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + object_name=${bucket_name}"-object" + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + # create multipart + function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}" + test_function=${function} + out=$($function) + rv=$? + upload_id=$(echo "$out" | jq -r .UploadId) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 1 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5-MB --upload-id ${upload_id} --part-number 1" + out=$($function) + rv=$? + etag1=$(echo "$out" | jq -r .ETag) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 2 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-1-kB --upload-id ${upload_id} --part-number 2" + out=$($function) + rv=$? + etag2=$(echo "$out" | jq -r .ETag) + # Create a multipart struct file for completing multipart transaction + echo "{ + \"Parts\": [ + { + \"ETag\": ${etag1}, + \"PartNumber\": 1 + }, + { + \"ETag\": ${etag2}, + \"PartNumber\": 2 + } + ] + }" >> /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + # Use saved etags to complete the multipart transaction + function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}" + out=$($function) + rv=$? + finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//') + if [ "${finalETag}" == "" ]; then + rv=1 + out="complete-multipart-upload failed" + fi + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove temp file + rm -f /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + rm -f /tmp/multipart + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# List number of objects based on the maxKey +# value set. +function test_max_key_list() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-b --bucket ${bucket_name} --key datafile-1-b" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # copy object server side + if [ $rv -eq 0 ]; then + function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-b-copy --copy-source ${bucket_name}/datafile-1-b" + out=$($function) + rv=$? + fi + + if [ $rv -eq 0 ]; then + function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --max-keys 1" + test_function=${function} + out=$($function 2>&1) + rv=$? + if [ $rv -eq 0 ]; then + out=$(echo "$out" | jq '.KeyCount') + rv=$? + fi + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # The command passed, but the delete_bucket failed + out="delete_bucket for test_max_key_list failed" + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Copy object tests for server side copy +# of the object, validates returned md5sum. +function test_copy_object() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # copy object server side + if [ $rv -eq 0 ]; then + function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB" + test_function=${function} + out=$($function) + rv=$? + hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//') + if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then + # Verification failed + rv=1 + out="Hash mismatch expected $HASH_1_KB, got $hash2" + fi + fi + + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Copy object tests for server side copy +# of the object, validates returned md5sum. +# validates change in storage class as well +function test_copy_object_storage_class() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # copy object server side + if [ $rv -eq 0 ]; then + function="${AWS} s3api copy-object --bucket ${bucket_name} --storage-class REDUCED_REDUNDANCY --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB" + test_function=${function} + out=$($function 2>&1) + rv=$? + # if this functionality is not implemented return right away. + if [ $rv -ne 0 ]; then + if echo "$out" | grep -q "NotImplemented"; then + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + return 0 + fi + fi + hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//') + if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then + # Verification failed + rv=1 + out="Hash mismatch expected $HASH_1_KB, got $hash2" + fi + fi + + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Copy object tests for server side copy +# to itself by changing storage class +function test_copy_object_storage_class_same() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # copy object server side + if [ $rv -eq 0 ]; then + function="${AWS} s3api copy-object --bucket ${bucket_name} --storage-class REDUCED_REDUNDANCY --key datafile-1-kB --copy-source ${bucket_name}/datafile-1-kB" + test_function=${function} + out=$($function 2>&1) + rv=$? + # if this functionality is not implemented return right away. + if [ $rv -ne 0 ]; then + if echo "$out" | grep -q "NotImplemented"; then + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + return 0 + fi + fi + hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//') + if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then + # Verification failed + rv=1 + out="Hash mismatch expected $HASH_1_KB, got $hash2" + fi + fi + + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests for presigned URL success case, presigned URL +# is correct and accessible - we calculate md5sum of +# the object and validate it against a local files md5sum. +function test_presigned_object() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + function="${AWS} s3 presign s3://${bucket_name}/datafile-1-kB" + test_function=${function} + url=$($function) + rv=$? + curl -sS -X GET "${url}" > /tmp/datafile-1-kB + get_md5 /tmp/datafile-1-kB + hash2=$md5rt + if [ "$HASH_1_KB" == "$hash2" ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove download file + rm -f /tmp/datafile-1-kB + else + rv=1 + out="Checksum verification failed for downloaded object" + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests creating and deleting an object - 10MiB +function test_upload_object_10() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-10-MB --bucket ${bucket_name} --key datafile-10-MB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests multipart API by making each individual calls with 10MiB part size. +function test_multipart_upload_10() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + object_name=${bucket_name}"-object" + rv=$? + + if [ $rv -eq 0 ]; then + # create multipart + function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}" + test_function=${function} + out=$($function) + rv=$? + upload_id=$(echo "$out" | jq -r .UploadId) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 1 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-10-MB --upload-id ${upload_id} --part-number 1" + out=$($function) + rv=$? + etag1=$(echo "$out" | jq -r .ETag) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 2 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-10-MB --upload-id ${upload_id} --part-number 2" + out=$($function) + rv=$? + etag2=$(echo "$out" | jq -r .ETag) + # Create a multipart struct file for completing multipart transaction + echo "{ + \"Parts\": [ + { + \"ETag\": ${etag1}, + \"PartNumber\": 1 + }, + { + \"ETag\": ${etag2}, + \"PartNumber\": 2 + } + ] + }" >> /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + # Use saved etags to complete the multipart transaction + function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}" + out=$($function) + rv=$? + finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//') + if [ "${finalETag}" == "" ]; then + rv=1 + out="complete-multipart-upload failed" + fi + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove temp file + rm -f /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + rm -f /tmp/multipart + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests lifecycle of a bucket. +function test_bucket_lifecycle() { + # log start time + start_time=$(get_time) + + echo "{ \"Rules\": [ { \"Expiration\": { \"Days\": 365 },\"ID\": \"Bucketlifecycle test\", \"Filter\": { \"Prefix\": \"\" }, \"Status\": \"Enabled\" } ] }" >> /tmp/lifecycle.json + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds put bucket lifecycle + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-bucket-lifecycle-configuration --bucket ${bucket_name} --lifecycle-configuration file:///tmp/lifecycle.json" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -ne 0 ]; then + # if this functionality is not implemented return right away. + if echo "$out" | grep -q "NotImplemented"; then + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + return 0 + fi + fi + + # if put bucket lifecycle succeeds get bucket lifecycle + if [ $rv -eq 0 ]; then + function="${AWS} s3api get-bucket-lifecycle-configuration --bucket ${bucket_name}" + out=$($function 2>&1) + rv=$? + fi + + # if get bucket lifecycle succeeds delete bucket lifecycle + if [ $rv -eq 0 ]; then + function="${AWS} s3api delete-bucket-lifecycle --bucket ${bucket_name}" + out=$($function 2>&1) + rv=$? + fi + + # delete lifecycle.json + rm -f /tmp/lifecycle.json + + # delete bucket + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "test_bucket_lifecycle" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests `aws s3 cp` by uploading a local file. +function test_aws_s3_cp() { + file_name="${MINT_DATA_DIR}/datafile-65-MB" + + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file using cp + if [ $rv -eq 0 ]; then + function="${AWS} s3 cp $file_name s3://${bucket_name}/$(basename "$file_name")" + test_function=${function} + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + function="${AWS} s3 rm s3://${bucket_name}/$(basename "$file_name")" + out=$($function 2>&1) + rv=$? + fi + + if [ $rv -eq 0 ]; then + function="${AWS} s3 rb s3://${bucket_name}/" + out=$($function 2>&1) + rv=$? + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# Tests `aws s3 sync` by mirroring all the +# local content to remove bucket. +function test_aws_s3_sync() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds sync all the files in a directory + if [ $rv -eq 0 ]; then + function="${AWS} s3 sync --no-progress $MINT_DATA_DIR s3://${bucket_name}/" + test_function=${function} + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # remove files recusively + if [ $rv -eq 0 ]; then + function="${AWS} s3 rm --recursive s3://${bucket_name}/" + out=$($function 2>&1) + rv=$? + fi + + # delete bucket + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# list objects negative test - tests for following conditions. +# v1 API with max-keys=-1 and max-keys=0 +# v2 API with max-keys=-1 and max-keys=0 +function test_list_objects_error() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + if [ $rv -eq 0 ]; then + # Server replies an error for v1 with max-key=-1 + function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=-1" + test_function=${function} + out=$($function 2>&1) + rv=$? + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + fi + + if [ $rv -eq 0 ]; then + # Server replies an error for v2 with max-keys=-1 + function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=-1" + test_function=${function} + out=$($function 2>&1) + rv=$? + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + fi + + if [ $rv -eq 0 ]; then + # Server returns success with no keys when max-keys=0 + function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=0" + out=$($function 2>&1) + rv=$? + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# put object negative test - tests for following conditions. +# - invalid object name. +# - invalid Content-Md5 +# - invalid Content-Length +function test_put_object_error() { + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload an object without content-md5. + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --content-md5 invalid" + test_function=${function} + out=$($function 2>&1) + rv=$? + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + fi + + # upload an object without content-length. + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --content-length -1" + test_function=${function} + out=$($function 2>&1) + rv=$? + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + fi + + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} +# tests server side encryption headers for get and put calls +function test_serverside_encryption() { + #skip server side encryption tests if HTTPS disabled. + if [ "$ENABLE_HTTPS" != "1" ]; then + return 0 + fi + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # put object with server side encryption headers + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + # now get encrypted object from server + if [ $rv -eq 0 ]; then + etag1=$(echo "$out" | jq -r .ETag) + sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5) + sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm) + + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-1-kB" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + if [ $rv -eq 0 ]; then + etag2=$(echo "$out" | jq -r .ETag) + sse_customer_key2=$(echo "$out" | jq -r .SSECustomerKeyMD5) + sse_customer_algo2=$(echo "$out" | jq -r .SSECustomerAlgorithm) + get_md5 "/tmp/datafile-1-kB" + hash2=$md5rt + # match downloaded object's hash to original + if [ "$HASH_1_KB" == "$hash2" ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove download file + rm -f /tmp/datafile-1-kB + else + rv=1 + out="Checksum verification failed for downloaded object" + fi + # match etag and SSE headers + if [ "$etag1" != "$etag2" ]; then + rv=1 + out="Etag mismatch for object encrypted with server side encryption" + fi + if [ "$sse_customer_algo1" != "$sse_customer_algo2" ]; then + rv=1 + out="sse customer algorithm mismatch" + fi + if [ "$sse_customer_key1" != "$sse_customer_key2" ]; then + rv=1 + out="sse customer key mismatch" + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# tests server side encryption headers for multipart put +function test_serverside_encryption_multipart() { + #skip server side encryption tests if HTTPS disabled. + if [ "$ENABLE_HTTPS" != "1" ]; then + return 0 + fi + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # put object with server side encryption headers + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-65-MB --bucket ${bucket_name} --key datafile-65-MB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + # now get encrypted object from server + if [ $rv -eq 0 ]; then + etag1=$(echo "$out" | jq -r .ETag) + sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5) + sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm) + + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-65-MB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-65-MB" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + if [ $rv -eq 0 ]; then + etag2=$(echo "$out" | jq -r .ETag) + sse_customer_key2=$(echo "$out" | jq -r .SSECustomerKeyMD5) + sse_customer_algo2=$(echo "$out" | jq -r .SSECustomerAlgorithm) + get_md5 "${MINT_DATA_DIR}/datafile-65-MB" + hash2=$md5rt + # match downloaded object's hash to original + if [ "$HASH_65_MB" == "$hash2" ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + # remove download file + rm -f /tmp/datafile-65-MB + else + rv=1 + out="Checksum verification failed for downloaded object" + fi + # match etag and SSE headers + if [ "$etag1" != "$etag2" ]; then + rv=1 + out="Etag mismatch for object encrypted with server side encryption" + fi + if [ "$sse_customer_algo1" != "$sse_customer_algo2" ]; then + rv=1 + out="sse customer algorithm mismatch" + fi + if [ "$sse_customer_key1" != "$sse_customer_key2" ]; then + rv=1 + out="sse customer key mismatch" + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + +# tests encrypted copy from multipart encrypted object to +# single part encrypted object. This test in particular checks if copy +# succeeds for the case where encryption overhead for individually +# encrypted parts vs encryption overhead for the original datastream +# differs. +function test_serverside_encryption_multipart_copy() { + #skip server side encryption tests if HTTPS disabled. + if [ "$ENABLE_HTTPS" != "1" ]; then + return 0 + fi + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + object_name=${bucket_name}"-object" + rv=$? + + if [ $rv -eq 0 ]; then + # create multipart + function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name} --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + out=$($function) + rv=$? + upload_id=$(echo "$out" | jq -r .UploadId) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 1 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 1 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + out=$($function) + rv=$? + etag1=$(echo "$out" | jq -r .ETag) + fi + + if [ $rv -eq 0 ]; then + # Capture etag for part-number 2 + function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 2 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + out=$($function) + rv=$? + etag2=$(echo "$out" | jq -r .ETag) + # Create a multipart struct file for completing multipart transaction + echo "{ + \"Parts\": [ + { + \"ETag\": ${etag1}, + \"PartNumber\": 1 + }, + { + \"ETag\": ${etag2}, + \"PartNumber\": 2 + } + ] + }" >> /tmp/multipart + fi + + if [ $rv -eq 0 ]; then + # Use saved etags to complete the multipart transaction + function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}" + out=$($function) + rv=$? + finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//') + if [ "${finalETag}" == "" ]; then + rv=1 + out="complete-multipart-upload failed" + fi + fi + + # copy object server side + if [ $rv -eq 0 ]; then + function="${AWS} s3api copy-object --bucket ${bucket_name} --key ${object_name}-copy --copy-source ${bucket_name}/${object_name} --copy-source-sse-customer-algorithm AES256 --copy-source-sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --copy-source-sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function) + rv=$? + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + fi + + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + rm -f /tmp/multipart + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} +# tests server side encryption headers for range get calls +function test_serverside_encryption_get_range() { + #skip server side encryption tests if HTTPS disabled. + if [ "$ENABLE_HTTPS" != "1" ]; then + return 0 + fi + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + # put object with server side encryption headers + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-10-kB --bucket ${bucket_name} --key datafile-10-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + # now get encrypted object from server for range 500-999 + if [ $rv -eq 0 ]; then + etag1=$(echo "$out" | jq -r .ETag) + sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5) + sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm) + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-10-kB --range bytes=500-999 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-10-kB" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + if [ $rv -eq 0 ]; then + cnt=$(stat -c%s /tmp/datafile-10-kB) + if [ "$cnt" -ne 500 ]; then + rv=1 + fi + fi + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + return $rv +} + +# tests server side encryption error for get and put calls +function test_serverside_encryption_error() { + #skip server side encryption tests if HTTPS disabled. + if [ "$ENABLE_HTTPS" != "1" ]; then + return 0 + fi + # log start time + start_time=$(get_time) + + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # put object with server side encryption headers with MD5Sum mismatch for sse-customer-key-md5 header + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + # put object with missing server side encryption header sse-customer-algorithm + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + + # put object with server side encryption headers successfully + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg==" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + + # now test get on encrypted object with nonmatching sse-customer-key and sse-customer-md5 headers + if [ $rv -eq 0 ]; then + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-1-kB" + test_function=${function} + out=$($function 2>&1) + rv=$? + fi + if [ $rv -ne $errno ]; then + rv=1 + else + rv=0 + fi + # delete bucket + if [ $rv -eq 0 ]; then + function="delete_bucket" + out=$(delete_bucket "$bucket_name") + rv=$? + fi + if [ $rv -eq 0 ]; then + log_success "$(get_duration "$start_time")" "${test_function}" + else + # clean up and log error + ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1 + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + fi + + return $rv +} + + +# test GetObjectInfo http code is 404 +function test_get_object_error(){ + # log start time + start_time=$(get_time) + function="make_bucket" + bucket_name=$(make_bucket) + rv=$? + + # if make bucket succeeds upload a file + if [ $rv -eq 0 ]; then + function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB" + out=$($function 2>&1) + rv=$? + else + # if make bucket fails, $bucket_name has the error output + out="${bucket_name}" + fi + + # if upload succeeds download the file + if [ $rv -eq 0 ]; then + function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB/ /tmp/datafile-1-kB" + # save the ref to function being tested, so it can be logged + test_function=${function} + out=$($function 2>&1) + if [ $? -eq $errno ];then + rv=0 + fi + if ! [[ "$out" =~ "The specified key does not exist" ]];then + log_failure "$(get_duration "$start_time")" "${function}" "${out}" + rv=1 + fi + fi + return $rv +} + + +# main handler for all the tests. +main() { + # Success tests + test_create_bucket && \ + test_upload_object && \ + test_lookup_object_prefix && \ + test_list_objects && \ + test_multipart_upload_0byte && \ + test_multipart_upload && \ + test_max_key_list && \ + test_copy_object && \ + test_copy_object_storage_class && \ + test_copy_object_storage_class_same && \ + test_presigned_object && \ + test_upload_object_10 && \ + test_multipart_upload_10 && \ +# test_bucket_lifecycle && \ + test_serverside_encryption && \ + test_serverside_encryption_get_range && \ + test_serverside_encryption_multipart && \ + test_serverside_encryption_multipart_copy && \ + # Success cli ops. + test_aws_s3_cp && \ + test_aws_s3_sync && \ + # Error tests + test_list_objects_error && \ + test_put_object_error && \ + test_serverside_encryption_error && \ + # test_worm_bucket && \ + # test_legal_hold + test_get_object_error + + return $? +} + +_init "$endpoint" && main diff --git a/pkg/chunk/cached_store.go b/pkg/chunk/cached_store.go new file mode 100644 index 0000000..95a9cbb --- /dev/null +++ b/pkg/chunk/cached_store.go @@ -0,0 +1,965 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/compress" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juju/ratelimit" + "github.com/prometheus/client_golang/prometheus" +) + +const chunkSize = 1 << 26 // 64M +const pageSize = 1 << 16 // 64K +const SlowRequest = time.Second * time.Duration(10) + +var ( + logger = utils.GetLogger("juicefs") + + cacheHits = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_hits", + Help: "read from cached block", + }) + cacheMiss = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_miss", + Help: "missed read from cached block", + }) + cacheWrites = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_writes", + Help: "written cached block", + }) + cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_drops", + Help: "dropped block", + }) + cacheEvicts = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_evicts", + Help: "evicted cache blocks", + }) + cacheHitBytes = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_hit_bytes", + Help: "read bytes from cached block", + }) + cacheMissBytes = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_miss_bytes", + Help: "missed bytes from cached block", + }) + cacheWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "blockcache_write_bytes", + Help: "write bytes of cached block", + }) + cacheReadHist = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "blockcache_read_hist_seconds", + Help: "read cached block latency distribution", + Buckets: prometheus.ExponentialBuckets(0.00001, 2, 20), + }) + cacheWriteHist = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "blockcache_write_hist_seconds", + Help: "write cached block latency distribution", + Buckets: prometheus.ExponentialBuckets(0.00001, 2, 20), + }) + + objectReqsHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Name: "object_request_durations_histogram_seconds", + Help: "Object requests latency distributions.", + Buckets: prometheus.ExponentialBuckets(0.01, 1.5, 25), + }, []string{"method"}) + objectReqErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "object_request_errors", + Help: "failed requests to object store", + }) + objectDataBytes = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "object_request_data_bytes", + Help: "Object requests size in bytes.", + }, []string{"method"}) + + stageBlocks = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "staging_blocks", + Help: "Number of blocks in the staging path.", + }) + stageBlockBytes = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "staging_block_bytes", + Help: "Total bytes of blocks in the staging path.", + }) +) + +// chunk for read only +type rChunk struct { + id uint64 + length int + store *cachedStore +} + +func chunkForRead(id uint64, length int, store *cachedStore) *rChunk { + return &rChunk{id, length, store} +} + +func (c *rChunk) blockSize(indx int) int { + bsize := c.length - indx*c.store.conf.BlockSize + if bsize > c.store.conf.BlockSize { + bsize = c.store.conf.BlockSize + } + return bsize +} + +func (c *rChunk) key(indx int) string { + if c.store.conf.Partitions > 1 { + return fmt.Sprintf("chunks/%02X/%v/%v_%v_%v", c.id%256, c.id/1000/1000, c.id, indx, c.blockSize(indx)) + } + return fmt.Sprintf("chunks/%v/%v/%v_%v_%v", c.id/1000/1000, c.id/1000, c.id, indx, c.blockSize(indx)) +} + +func (c *rChunk) index(off int) int { + return off / c.store.conf.BlockSize +} + +func (c *rChunk) keys() []string { + if c.length <= 0 { + return nil + } + lastIndx := (c.length - 1) / c.store.conf.BlockSize + keys := make([]string, lastIndx+1) + for i := 0; i <= lastIndx; i++ { + keys[i] = c.key(i) + } + return keys +} + +func (c *rChunk) ReadAt(ctx context.Context, page *Page, off int) (n int, err error) { + p := page.Data + if len(p) == 0 { + return 0, nil + } + if off >= c.length { + return 0, io.EOF + } + + indx := c.index(off) + boff := off % c.store.conf.BlockSize + blockSize := c.blockSize(indx) + if boff+len(p) > blockSize { + // read beyond currend page + var got int + for got < len(p) { + // aligned to current page + l := utils.Min(len(p)-got, c.blockSize(c.index(off))-off%c.store.conf.BlockSize) + pp := page.Slice(got, l) + n, err = c.ReadAt(ctx, pp, off) + pp.Release() + if err != nil { + return got + n, err + } + if n == 0 { + return got, io.EOF + } + got += n + off += n + } + return got, nil + } + + key := c.key(indx) + if c.store.conf.CacheSize > 0 { + start := time.Now() + r, err := c.store.bcache.load(key) + if err == nil { + n, err = r.ReadAt(p, int64(boff)) + _ = r.Close() + if err == nil { + cacheHits.Add(1) + cacheHitBytes.Add(float64(n)) + cacheReadHist.Observe(time.Since(start).Seconds()) + return n, nil + } + if f, ok := r.(*os.File); ok { + logger.Warnf("remove partial cached block %s: %d %s", f.Name(), n, err) + _ = os.Remove(f.Name()) + } + } + } + + cacheMiss.Add(1) + cacheMissBytes.Add(float64(len(p))) + + if c.store.seekable && boff > 0 && len(p) <= blockSize/4 { + if c.store.downLimit != nil { + c.store.downLimit.Wait(int64(len(p))) + } + // partial read + st := time.Now() + in, err := c.store.storage.Get(key, int64(boff), int64(len(p))) + if err == nil { + n, err = io.ReadFull(in, p) + _ = in.Close() + } + used := time.Since(st) + logger.Debugf("GET %s RANGE(%d,%d) (%s, %.3fs)", key, boff, len(p), err, used.Seconds()) + if used > SlowRequest { + logger.Infof("slow request: GET %s (%v, %.3fs)", key, err, used.Seconds()) + } + objectDataBytes.WithLabelValues("GET").Add(float64(n)) + objectReqsHistogram.WithLabelValues("GET").Observe(used.Seconds()) + c.store.fetcher.fetch(key) + if err == nil { + return n, nil + } else { + objectReqErrors.Add(1) + } + } + + block, err := c.store.group.Execute(key, func() (*Page, error) { + tmp := page + if boff > 0 || len(p) < blockSize { + tmp = NewOffPage(blockSize) + } else { + tmp.Acquire() + } + tmp.Acquire() + err := utils.WithTimeout(func() error { + defer tmp.Release() + return c.store.load(key, tmp, c.store.shouldCache(blockSize), false) + }, c.store.conf.GetTimeout) + return tmp, err + }) + defer block.Release() + if err != nil { + return 0, err + } + if block != page { + copy(p, block.Data[boff:]) + } + return len(p), nil +} + +func (c *rChunk) delete(indx int) error { + key := c.key(indx) + st := time.Now() + err := c.store.storage.Delete(key) + used := time.Since(st) + logger.Debugf("DELETE %v (%v, %.3fs)", key, err, used.Seconds()) + if used > SlowRequest { + logger.Infof("slow request: DELETE %v (%v, %.3fs)", key, err, used.Seconds()) + } + objectReqsHistogram.WithLabelValues("DELETE").Observe(used.Seconds()) + if err != nil { + objectReqErrors.Add(1) + } + return err +} + +func (c *rChunk) Remove() error { + if c.length == 0 { + // no block + return nil + } + + lastIndx := (c.length - 1) / c.store.conf.BlockSize + var err error + for i := 0; i <= lastIndx; i++ { + // there could be multiple clients try to remove the same chunk in the same time, + // any of them should succeed if any blocks is removed + key := c.key(i) + c.store.pendingMutex.Lock() + delete(c.store.pendingKeys, key) + c.store.pendingMutex.Unlock() + c.store.bcache.remove(key) + if e := c.delete(i); e != nil { + err = e + } + } + return err +} + +var pagePool = make(chan *Page, 128) + +func allocPage(sz int) *Page { + if sz != pageSize { + return NewOffPage(sz) + } + select { + case p := <-pagePool: + return p + default: + return NewOffPage(pageSize) + } +} + +func freePage(p *Page) { + if cap(p.Data) != pageSize { + p.Release() + return + } + select { + case pagePool <- p: + default: + p.Release() + } +} + +// chunk for write only +type wChunk struct { + rChunk + pages [][]*Page + uploaded int + errors chan error + uploadError error + pendings int +} + +func chunkForWrite(id uint64, store *cachedStore) *wChunk { + return &wChunk{ + rChunk: rChunk{id, 0, store}, + pages: make([][]*Page, chunkSize/store.conf.BlockSize), + errors: make(chan error, chunkSize/store.conf.BlockSize), + } +} + +func (c *wChunk) SetID(id uint64) { + c.id = id +} + +func (c *wChunk) WriteAt(p []byte, off int64) (n int, err error) { + if int(off)+len(p) > chunkSize { + return 0, fmt.Errorf("write out of chunk boudary: %d > %d", int(off)+len(p), chunkSize) + } + if off < int64(c.uploaded) { + return 0, fmt.Errorf("Cannot overwrite uploaded block: %d < %d", off, c.uploaded) + } + + // Fill previous blocks with zeros + if c.length < int(off) { + zeros := make([]byte, int(off)-c.length) + _, _ = c.WriteAt(zeros, int64(c.length)) + } + + for n < len(p) { + indx := c.index(int(off) + n) + boff := (int(off) + n) % c.store.conf.BlockSize + var bs = pageSize + if indx > 0 || bs > c.store.conf.BlockSize { + bs = c.store.conf.BlockSize + } + bi := boff / bs + bo := boff % bs + var page *Page + if bi < len(c.pages[indx]) { + page = c.pages[indx][bi] + } else { + page = allocPage(bs) + page.Data = page.Data[:0] + c.pages[indx] = append(c.pages[indx], page) + } + left := len(p) - n + if bo+left > bs { + page.Data = page.Data[:bs] + } else if len(page.Data) < bo+left { + page.Data = page.Data[:bo+left] + } + n += copy(page.Data[bo:], p[n:]) + } + if int(off)+n > c.length { + c.length = int(off) + n + } + return n, nil +} + +func (c *wChunk) put(key string, p *Page) error { + if c.store.upLimit != nil { + c.store.upLimit.Wait(int64(len(p.Data))) + } + p.Acquire() + return utils.WithTimeout(func() error { + defer p.Release() + st := time.Now() + err := c.store.storage.Put(key, bytes.NewReader(p.Data)) + used := time.Since(st) + logger.Debugf("PUT %s (%s, %.3fs)", key, err, used.Seconds()) + if used > SlowRequest { + logger.Infof("slow request: PUT %v (%v, %.3fs)", key, err, used.Seconds()) + } + objectDataBytes.WithLabelValues("PUT").Add(float64(len(p.Data))) + objectReqsHistogram.WithLabelValues("PUT").Observe(used.Seconds()) + if err != nil { + objectReqErrors.Add(1) + } + return err + }, c.store.conf.PutTimeout) +} + +func (c *wChunk) syncUpload(key string, block *Page) { + blen := len(block.Data) + bufSize := c.store.compressor.CompressBound(blen) + var buf *Page + if bufSize > blen { + buf = NewOffPage(bufSize) + } else { + buf = block + buf.Acquire() + } + n, err := c.store.compressor.Compress(buf.Data, block.Data) + if err != nil { + logger.Fatalf("compress chunk %v: %s", c.id, err) + return + } + buf.Data = buf.Data[:n] + if blen < c.store.conf.BlockSize { + // block will be freed after written into disk + c.store.bcache.cache(key, block, false) + } + block.Release() + + c.store.currentUpload <- true + defer func() { + buf.Release() + <-c.store.currentUpload + }() + + try := 0 + for try <= 10 && c.uploadError == nil { + err = c.put(key, buf) + if err == nil { + c.errors <- nil + return + } + try++ + logger.Warnf("upload %s: %s (try %d)", key, err, try) + time.Sleep(time.Second * time.Duration(try*try)) + } + c.errors <- fmt.Errorf("upload block %s: %s (after %d tries)", key, err, try) +} + +func (c *wChunk) asyncUpload(key string, block *Page, stagingPath string) { + blockSize := len(block.Data) + defer c.store.bcache.uploaded(key, blockSize) + defer func() { + <-c.store.currentUpload + }() + select { + case c.store.currentUpload <- true: + default: + // release the memory and wait + block.Release() + c.store.pendingMutex.Lock() + c.store.pendingKeys[key] = time.Now() + c.store.pendingMutex.Unlock() + defer func() { + c.store.pendingMutex.Lock() + delete(c.store.pendingKeys, key) + c.store.pendingMutex.Unlock() + }() + + logger.Debugf("wait to upload %s", key) + c.store.currentUpload <- true + + // load from disk + f, err := os.Open(stagingPath) + if err != nil { + c.store.pendingMutex.Lock() + _, ok := c.store.pendingKeys[key] + c.store.pendingMutex.Unlock() + if ok { + logger.Errorf("read stagging file %s: %s", stagingPath, err) + } else { + logger.Debugf("%s is not needed, drop it", key) + } + return + } + + block = NewOffPage(blockSize) + _, err = io.ReadFull(f, block.Data) + _ = f.Close() + if err != nil { + logger.Errorf("read stagging file %s: %s", stagingPath, err) + block.Release() + return + } + } + bufSize := c.store.compressor.CompressBound(blockSize) + var buf *Page + if bufSize > blockSize { + buf = NewOffPage(bufSize) + } else { + buf = block + buf.Acquire() + } + n, err := c.store.compressor.Compress(buf.Data, block.Data) + if err != nil { + logger.Fatalf("compress chunk %v: %s", c.id, err) + return + } + buf.Data = buf.Data[:n] + block.Release() + + try := 0 + for c.uploadError == nil { + err = c.put(key, buf) + if err == nil { + break + } + logger.Warnf("upload %s: %s (tried %d)", key, err, try) + try++ + time.Sleep(time.Second * time.Duration(try)) + } + buf.Release() + if err = os.Remove(stagingPath); err == nil { + stageBlocks.Sub(1) + stageBlockBytes.Sub(float64(blockSize)) + } +} + +func (c *wChunk) upload(indx int) { + blen := c.blockSize(indx) + key := c.key(indx) + pages := c.pages[indx] + c.pages[indx] = nil + c.pendings++ + + go func() { + var block *Page + if len(pages) == 1 { + block = pages[0] + } else { + block = NewOffPage(blen) + var off int + for _, b := range pages { + off += copy(block.Data[off:], b.Data) + freePage(b) + } + if off != blen { + logger.Fatalf("block length does not match: %v != %v", off, blen) + } + } + if c.store.conf.Writeback { + stagingPath, err := c.store.bcache.stage(key, block.Data, c.store.shouldCache(blen)) + if err != nil { + logger.Warnf("write %s to disk: %s, upload it directly", stagingPath, err) + c.syncUpload(key, block) + } else { + c.errors <- nil + if c.store.conf.UploadDelay == 0 { + go c.asyncUpload(key, block, stagingPath) + } else { + block.Release() + c.store.pendingMutex.Lock() + c.store.pendingKeys[key] = time.Now() + c.store.pendingMutex.Unlock() + } + } + } else { + c.syncUpload(key, block) + } + }() +} + +func (c *wChunk) ID() uint64 { + return c.id +} + +func (c *wChunk) Len() int { + return c.length +} + +func (c *wChunk) FlushTo(offset int) error { + if offset < c.uploaded { + logger.Fatalf("Invalid offset: %d < %d", offset, c.uploaded) + } + for i, block := range c.pages { + start := i * c.store.conf.BlockSize + end := start + c.store.conf.BlockSize + if start >= c.uploaded && end <= offset { + if block != nil { + c.upload(i) + } + c.uploaded = end + } + } + + return nil +} + +func (c *wChunk) Finish(length int) error { + if c.length != length { + return fmt.Errorf("Length mismatch: %v != %v", c.length, length) + } + + n := (length-1)/c.store.conf.BlockSize + 1 + if err := c.FlushTo(n * c.store.conf.BlockSize); err != nil { + return err + } + for i := 0; i < c.pendings; i++ { + if err := <-c.errors; err != nil { + c.uploadError = err + return err + } + } + return nil +} + +func (c *wChunk) Abort() { + for i := range c.pages { + for _, b := range c.pages[i] { + freePage(b) + } + c.pages[i] = nil + } + // delete uploaded blocks + c.length = c.uploaded + _ = c.Remove() +} + +// Config contains options for cachedStore +type Config struct { + CacheDir string + CacheMode os.FileMode + CacheSize int64 + FreeSpace float32 + AutoCreate bool + Compress string + MaxUpload int + UploadLimit int64 // bytes per second + DownloadLimit int64 // bytes per second + Writeback bool + UploadDelay time.Duration + Partitions int + BlockSize int + GetTimeout time.Duration + PutTimeout time.Duration + CacheFullBlock bool + BufferSize int + Readahead int + Prefetch int +} + +type cachedStore struct { + storage object.ObjectStorage + bcache CacheManager + fetcher *prefetcher + conf Config + group *Controller + currentUpload chan bool + pendingKeys map[string]time.Time + pendingMutex sync.Mutex + compressor compress.Compressor + seekable bool + upLimit *ratelimit.Bucket + downLimit *ratelimit.Bucket +} + +func (store *cachedStore) load(key string, page *Page, cache bool, forceCache bool) (err error) { + defer func() { + e := recover() + if e != nil { + err = fmt.Errorf("recovered from %s", e) + } + }() + needed := store.compressor.CompressBound(len(page.Data)) + compressed := needed > len(page.Data) + // we don't know the actual size for compressed block + if store.downLimit != nil && !compressed { + store.downLimit.Wait(int64(len(page.Data))) + } + err = errors.New("Not downloaded") + var in io.ReadCloser + tried := 0 + start := time.Now() + // it will be retried outside + for err != nil && tried < 2 { + time.Sleep(time.Second * time.Duration(tried*tried)) + if tried > 0 { + logger.Warnf("GET %s: %s; retrying", key, err) + objectReqErrors.Add(1) + start = time.Now() + } + in, err = store.storage.Get(key, 0, -1) + tried++ + } + var n int + var buf []byte + if err == nil { + if compressed { + c := NewOffPage(needed) + defer c.Release() + buf = c.Data + } else { + buf = page.Data + } + n, err = io.ReadFull(in, buf) + _ = in.Close() + } + if compressed && err == io.ErrUnexpectedEOF { + err = nil + } + used := time.Since(start) + logger.Debugf("GET %s (%s, %.3fs)", key, err, used.Seconds()) + if used > SlowRequest { + logger.Infof("slow request: GET %s (%v, %.3fs)", key, err, used.Seconds()) + } + if store.downLimit != nil && compressed { + store.downLimit.Wait(int64(n)) + } + objectDataBytes.WithLabelValues("GET").Add(float64(n)) + objectReqsHistogram.WithLabelValues("GET").Observe(used.Seconds()) + if err != nil { + objectReqErrors.Add(1) + return fmt.Errorf("get %s: %s", key, err) + } + if compressed { + n, err = store.compressor.Decompress(page.Data, buf[:n]) + } + if err != nil || n < len(page.Data) { + return fmt.Errorf("read %s fully: %s (%d < %d) after %s (tried %d)", key, err, n, len(page.Data), + used, tried) + } + if cache { + store.bcache.cache(key, page, forceCache) + } + return nil +} + +// NewCachedStore create a cached store. +func NewCachedStore(storage object.ObjectStorage, config Config) ChunkStore { + compressor := compress.NewCompressor(config.Compress) + if compressor == nil { + logger.Fatalf("unknown compress algorithm: %s", config.Compress) + } + if config.GetTimeout == 0 { + config.GetTimeout = time.Second * 60 + } + if config.PutTimeout == 0 { + config.PutTimeout = time.Second * 60 + } + store := &cachedStore{ + storage: storage, + conf: config, + currentUpload: make(chan bool, config.MaxUpload), + compressor: compressor, + seekable: compressor.CompressBound(0) == 0, + pendingKeys: make(map[string]time.Time), + group: &Controller{}, + } + if config.UploadLimit > 0 { + // there are overheads coming from HTTP/TCP/IP + store.upLimit = ratelimit.NewBucketWithRate(float64(config.UploadLimit)*0.85, config.UploadLimit) + } + if config.DownloadLimit > 0 { + store.downLimit = ratelimit.NewBucketWithRate(float64(config.DownloadLimit)*0.85, config.DownloadLimit) + } + store.bcache = newCacheManager(&config, store.uploadStagingFile) + if config.CacheSize == 0 { + config.Prefetch = 0 // disable prefetch if cache is disabled + } + store.fetcher = newPrefetcher(config.Prefetch, func(key string) { + size := parseObjOrigSize(key) + if size == 0 || size > store.conf.BlockSize { + return + } + p := NewOffPage(size) + defer p.Release() + _ = store.load(key, p, true, true) + }) + _ = prometheus.Register(cacheHits) + _ = prometheus.Register(cacheHitBytes) + _ = prometheus.Register(cacheMiss) + _ = prometheus.Register(cacheMissBytes) + _ = prometheus.Register(cacheWrites) + _ = prometheus.Register(cacheWriteBytes) + _ = prometheus.Register(cacheDrops) + _ = prometheus.Register(cacheEvicts) + _ = prometheus.Register(cacheReadHist) + _ = prometheus.Register(cacheWriteHist) + _ = prometheus.Register(prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Name: "blockcache_blocks", + Help: "number of cached blocks", + }, + func() float64 { + cnt, _ := store.bcache.stats() + return float64(cnt) + })) + _ = prometheus.Register(prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Name: "blockcache_bytes", + Help: "number of cached bytes", + }, + func() float64 { + _, used := store.bcache.stats() + return float64(used) + })) + _ = prometheus.Register(objectReqsHistogram) + _ = prometheus.Register(objectReqErrors) + _ = prometheus.Register(objectDataBytes) + _ = prometheus.Register(stageBlocks) + _ = prometheus.Register(stageBlockBytes) + + if store.conf.CacheDir != "memory" && store.conf.Writeback && store.conf.UploadDelay > 0 { + logger.Infof("delay uploading by %s", store.conf.UploadDelay) + go func() { + for { + if store.conf.UploadDelay > time.Minute { + time.Sleep(time.Minute) + } else { + time.Sleep(store.conf.UploadDelay) + } + store.uploadDelayedStaging() + } + }() + } + return store +} + +func (store *cachedStore) shouldCache(size int) bool { + return store.conf.CacheFullBlock || size < store.conf.BlockSize || store.conf.UploadDelay > 0 +} + +func parseObjOrigSize(key string) int { + p := strings.LastIndexByte(key, '_') + l, _ := strconv.Atoi(key[p+1:]) + return l +} + +func (store *cachedStore) uploadStagingFile(key string, stagingPath string) { + store.currentUpload <- true + go func() { + defer func() { + <-store.currentUpload + }() + + f, err := os.Open(stagingPath) + if err != nil { + logger.Errorf("open %s: %s", stagingPath, err) + return + } + blockSize := parseObjOrigSize(key) + block := NewOffPage(blockSize) + _, err = io.ReadFull(f, block.Data) + _ = f.Close() + if err != nil { + block.Release() + logger.Errorf("read %s: %s", stagingPath, err) + return + } + buf := NewOffPage(store.compressor.CompressBound(blockSize)) + defer buf.Release() + n, err := store.compressor.Compress(buf.Data, block.Data) + block.Release() + if err != nil { + logger.Errorf("compress chunk %s: %s", stagingPath, err) + return + } + compressed := buf.Data[:n] + try := 0 + for { + if store.upLimit != nil { + store.upLimit.Wait(int64(len(compressed))) + } + st := time.Now() + err := store.storage.Put(key, bytes.NewReader(compressed)) + used := time.Since(st) + logger.Debugf("PUT %s (%s, %.3fs)", key, err, used.Seconds()) + if used > SlowRequest { + logger.Infof("slow request: PUT %v (%v, %.3fs)", key, err, used.Seconds()) + } + objectDataBytes.WithLabelValues("PUT").Add(float64(len(compressed))) + objectReqsHistogram.WithLabelValues("PUT").Observe(used.Seconds()) + if err == nil { + break + } else { + objectReqErrors.Add(1) + } + logger.Warnf("upload %s: %s (try %d)", key, err, try) + try++ + time.Sleep(time.Second * time.Duration(try*try)) + } + store.bcache.uploaded(key, blockSize) + store.pendingMutex.Lock() + delete(store.pendingKeys, key) + store.pendingMutex.Unlock() + if err = os.Remove(stagingPath); err == nil { + stageBlocks.Sub(1) + stageBlockBytes.Sub(float64(blockSize)) + } + }() +} + +func (store *cachedStore) uploadDelayedStaging() { + store.pendingMutex.Lock() + cutoff := time.Now().Add(-store.conf.UploadDelay) + for key, added := range store.pendingKeys { + store.pendingMutex.Unlock() + if added.Before(cutoff) { + store.uploadStagingFile(key, store.bcache.stagePath(key)) + } + store.pendingMutex.Lock() + } + store.pendingMutex.Unlock() +} + +func (store *cachedStore) NewReader(chunkid uint64, length int) Reader { + return chunkForRead(chunkid, length, store) +} + +func (store *cachedStore) NewWriter(chunkid uint64) Writer { + return chunkForWrite(chunkid, store) +} + +func (store *cachedStore) Remove(chunkid uint64, length int) error { + r := chunkForRead(chunkid, length, store) + return r.Remove() +} + +func (store *cachedStore) FillCache(chunkid uint64, length uint32) error { + r := chunkForRead(chunkid, int(length), store) + keys := r.keys() + var err error + for _, k := range keys { + f, e := store.bcache.load(k) + if e == nil { // already cached + _ = f.Close() + continue + } + size := parseObjOrigSize(k) + if size == 0 || size > store.conf.BlockSize { + logger.Warnf("Invalid size: %s %d", k, size) + continue + } + p := NewOffPage(size) + defer p.Release() + if e := store.load(k, p, true, true); e != nil { + logger.Warnf("Failed to load key: %s %s", k, e) + err = e + } + } + return err +} + +func (store *cachedStore) UsedMemory() int64 { + return store.bcache.usedMemory() +} + +var _ ChunkStore = &cachedStore{} diff --git a/pkg/chunk/cached_store_test.go b/pkg/chunk/cached_store_test.go new file mode 100644 index 0000000..b2f15f0 --- /dev/null +++ b/pkg/chunk/cached_store_test.go @@ -0,0 +1,290 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint:errcheck +package chunk + +import ( + "bytes" + "context" + "io" + "os" + "path/filepath" + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/object" +) + +func forgeChunk(store ChunkStore, chunkid uint64, size int) error { + w := store.NewWriter(chunkid) + buf := bytes.Repeat([]byte{0x41}, size) + if _, err := w.WriteAt(buf, 0); err != nil { + return err + } + return w.Finish(size) +} + +func testStore(t *testing.T, store ChunkStore) { + writer := store.NewWriter(1) + data := []byte("hello world") + if n, err := writer.WriteAt(data, 0); n != 11 || err != nil { + t.Fatalf("write fail: %d %s", n, err) + } + offset := defaultConf.BlockSize - 3 + if n, err := writer.WriteAt(data, int64(offset)); err != nil || n != 11 { + t.Fatalf("write fail: %d %s", n, err) + } + if err := writer.FlushTo(defaultConf.BlockSize + 3); err != nil { + t.Fatalf("flush fail: %s", err) + } + size := offset + len(data) + if err := writer.Finish(size); err != nil { + t.Fatalf("finish fail: %s", err) + } + defer store.Remove(1, size) + + reader := store.NewReader(1, size) + p := NewPage(make([]byte, 5)) + if n, err := reader.ReadAt(context.Background(), p, 6); n != 5 || err != nil { + t.Fatalf("read failed: %d %s", n, err) + } else if string(p.Data[:n]) != "world" { + t.Fatalf("not expected: %s", string(p.Data[:n])) + } + p = NewPage(make([]byte, 20)) + if n, err := reader.ReadAt(context.Background(), p, offset); n != 11 || err != nil && err != io.EOF { + t.Fatalf("read failed: %d %s", n, err) + } else if string(p.Data[:n]) != "hello world" { + t.Fatalf("not expected: %s", string(p.Data[:n])) + } + + bsize := defaultConf.BlockSize / 2 + errs := make(chan error, 3) + for i := 2; i < 5; i++ { + go func(chunkid uint64) { + if err := forgeChunk(store, chunkid, bsize); err != nil { + errs <- err + return + } + time.Sleep(time.Millisecond * 100) // waiting for flush + errs <- store.Remove(chunkid, bsize) + }(uint64(i)) + } + for i := 0; i < 3; i++ { + if err := <-errs; err != nil { + t.Fatalf("test concurrent write failed: %s", err) + } + } +} + +var defaultConf = Config{ + BlockSize: 1 << 20, + CacheDir: filepath.Join(os.TempDir(), "diskCache"), + CacheSize: 1, + MaxUpload: 1, + PutTimeout: time.Second, + GetTimeout: time.Second * 2, + AutoCreate: true, + BufferSize: 10 << 20, +} + +func TestStoreDefault(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + _ = os.RemoveAll(defaultConf.CacheDir) + store := NewCachedStore(mem, defaultConf) + testStore(t, store) + if used := store.UsedMemory(); used != 0 { + t.Fatalf("used memory %d != expect 0", used) + } + if cnt, used := store.(*cachedStore).bcache.stats(); cnt != 0 || used != 0 { + t.Fatalf("cache cnt %d used %d, expect both 0", cnt, used) + } +} + +func TestStoreMemCache(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.CacheDir = "memory" + store := NewCachedStore(mem, conf) + testStore(t, store) + if used := store.UsedMemory(); used != 0 { + t.Fatalf("used memory %d != expect 0", used) + } + if cnt, used := store.(*cachedStore).bcache.stats(); cnt != 0 || used != 0 { + t.Fatalf("cache cnt %d used %d, expect both 0", cnt, used) + } +} +func TestStoreCompressed(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.Compress = "lz4" + conf.AutoCreate = false + store := NewCachedStore(mem, conf) + testStore(t, store) +} + +func TestStoreLimited(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.UploadLimit = 1 << 20 + conf.DownloadLimit = 1 << 20 + store := NewCachedStore(mem, conf) + testStore(t, store) +} + +func TestStoreFull(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.FreeSpace = 0.9999 + store := NewCachedStore(mem, conf) + testStore(t, store) +} + +func TestStoreSmallBuffer(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.BufferSize = 1 << 20 + store := NewCachedStore(mem, conf) + testStore(t, store) +} + +func TestStoreAsync(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.Writeback = true + p := filepath.Join(conf.CacheDir, stagingDir, "chunks/0/0/123_0_4") + os.MkdirAll(filepath.Dir(p), 0744) + f, _ := os.Create(p) + f.WriteString("good") + f.Close() + store := NewCachedStore(mem, conf) + time.Sleep(time.Millisecond * 50) // wait for scan to finish + in, err := mem.Get("chunks/0/0/123_0_4", 0, -1) + if err != nil { + t.Fatalf("staging object should be upload") + } + data, _ := io.ReadAll(in) + if string(data) != "good" { + t.Fatalf("data %s != expect good", data) + } + testStore(t, store) +} + +func TestStoreDelayed(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.Writeback = true + conf.UploadDelay = time.Millisecond * 200 + store := NewCachedStore(mem, conf) + time.Sleep(time.Second) // waiting for cache scanned + testStore(t, store) + if err := forgeChunk(store, 10, 1024); err != nil { + t.Fatalf("forge chunk 10 1024: %s", err) + } + defer store.Remove(10, 1024) + time.Sleep(time.Second) // waiting for upload + if _, err := mem.Head("chunks/0/0/10_0_1024"); err != nil { + t.Fatalf("head object 10_0_1024: %s", err) + } +} + +func TestStoreMultiBuckets(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.Partitions = 3 + store := NewCachedStore(mem, conf) + testStore(t, store) +} + +func TestFillCache(t *testing.T) { + mem, _ := object.CreateStorage("mem", "", "", "") + conf := defaultConf + conf.CacheSize = 10 + _ = os.RemoveAll(conf.CacheDir) + store := NewCachedStore(mem, conf) + if err := forgeChunk(store, 10, 1024); err != nil { + t.Fatalf("forge chunk 10 1024: %s", err) + } + defer store.Remove(10, 1024) + bsize := conf.BlockSize + if err := forgeChunk(store, 11, bsize); err != nil { + t.Fatalf("forge chunk 11 %d: %s", bsize, err) + } + defer store.Remove(11, bsize) + + time.Sleep(time.Millisecond * 100) // waiting for flush + bcache := store.(*cachedStore).bcache + if cnt, used := bcache.stats(); cnt != 1 || used != 1024+4096 { // only chunk 10 cached + t.Fatalf("cache cnt %d used %d, expect cnt 1 used 5120", cnt, used) + } + if err := store.FillCache(10, 1024); err != nil { + t.Fatalf("fill cache 10 1024: %s", err) + } + if err := store.FillCache(11, uint32(bsize)); err != nil { + t.Fatalf("fill cache 11 %d: %s", bsize, err) + } + time.Sleep(time.Second) + expect := int64(1024 + 4096 + bsize + 4096) + if cnt, used := bcache.stats(); cnt != 2 || used != expect { + t.Fatalf("cache cnt %d used %d, expect cnt 2 used %d", cnt, used, expect) + } +} + +func BenchmarkCachedRead(b *testing.B) { + blob, _ := object.CreateStorage("mem", "", "", "") + config := defaultConf + config.BlockSize = 4 << 20 + store := NewCachedStore(blob, config) + w := store.NewWriter(1) + if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil { + b.Fatalf("write fail: %s", err) + } + if err := w.Finish(1024); err != nil { + b.Fatalf("write fail: %s", err) + } + time.Sleep(time.Millisecond * 100) + p := NewPage(make([]byte, 1024)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r := store.NewReader(1, 1024) + if n, err := r.ReadAt(context.Background(), p, 0); err != nil || n != 1024 { + b.FailNow() + } + } +} + +func BenchmarkUncachedRead(b *testing.B) { + blob, _ := object.CreateStorage("mem", "", "", "") + config := defaultConf + config.BlockSize = 4 << 20 + config.CacheSize = 0 + store := NewCachedStore(blob, config) + w := store.NewWriter(2) + if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil { + b.Fatalf("write fail: %s", err) + } + if err := w.Finish(1024); err != nil { + b.Fatalf("write fail: %s", err) + } + p := NewPage(make([]byte, 1024)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r := store.NewReader(2, 1024) + if n, err := r.ReadAt(context.Background(), p, 0); err != nil || n != 1024 { + b.FailNow() + } + } +} diff --git a/pkg/chunk/chunk.go b/pkg/chunk/chunk.go new file mode 100644 index 0000000..8ee75a1 --- /dev/null +++ b/pkg/chunk/chunk.go @@ -0,0 +1,43 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "context" + "io" +) + +type Reader interface { + ReadAt(ctx context.Context, p *Page, off int) (int, error) +} + +type Writer interface { + io.WriterAt + ID() uint64 + SetID(chunkid uint64) + FlushTo(offset int) error + Finish(length int) error + Abort() +} + +type ChunkStore interface { + NewReader(chunkid uint64, length int) Reader + NewWriter(chunkid uint64) Writer + Remove(chunkid uint64, length int) error + FillCache(chunkid uint64, length uint32) error + UsedMemory() int64 +} diff --git a/pkg/chunk/disk_cache.go b/pkg/chunk/disk_cache.go new file mode 100644 index 0000000..913cf8d --- /dev/null +++ b/pkg/chunk/disk_cache.go @@ -0,0 +1,681 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "errors" + "hash/fnv" + "io" + "math" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/juicedata/juicefs/pkg/utils" +) + +var ( + stagingDir = "rawstaging" + cacheDir = "raw" +) + +type cacheItem struct { + size int32 + atime uint32 +} + +type pendingFile struct { + key string + page *Page +} + +type cacheStore struct { + totalPages int64 + sync.Mutex + dir string + mode os.FileMode + capacity int64 + freeRatio float32 + pending chan pendingFile + pages map[string]*Page + + used int64 + keys map[string]cacheItem + scanned bool + full bool + uploader func(key, path string) +} + +func newCacheStore(dir string, cacheSize int64, pendingPages int, config *Config, uploader func(key, path string)) *cacheStore { + if config.CacheMode == 0 { + config.CacheMode = 0600 // only owner can read/write cache + } + if config.FreeSpace == 0.0 { + config.FreeSpace = 0.1 // 10% + } + c := &cacheStore{ + dir: dir, + mode: config.CacheMode, + capacity: cacheSize, + freeRatio: config.FreeSpace, + keys: make(map[string]cacheItem), + pending: make(chan pendingFile, pendingPages), + pages: make(map[string]*Page), + uploader: uploader, + } + c.createDir(c.dir) + br, fr := c.curFreeRatio() + if br < c.freeRatio || fr < c.freeRatio { + logger.Warnf("not enough space (%d%%) or inodes (%d%%) for caching in %s: free ratio should be >= %d%%", int(br*100), int(fr*100), c.dir, int(c.freeRatio*100)) + } + logger.Infof("Disk cache (%s): capacity (%d MB), free ratio (%d%%), max pending pages (%d)", c.dir, c.capacity>>20, int(c.freeRatio*100), pendingPages) + go c.flush() + go c.checkFreeSpace() + go c.refreshCacheKeys() + go c.scanStaging() + return c +} + +func (c *cacheStore) usedMemory() int64 { + return atomic.LoadInt64(&c.totalPages) +} + +func (cache *cacheStore) stats() (int64, int64) { + cache.Lock() + defer cache.Unlock() + return int64(len(cache.pages) + len(cache.keys)), cache.used + cache.usedMemory() +} + +func (cache *cacheStore) checkFreeSpace() { + for { + br, fr := cache.curFreeRatio() + cache.full = br < cache.freeRatio/2 || fr < cache.freeRatio/2 + if br < cache.freeRatio || fr < cache.freeRatio { + logger.Tracef("Cleanup cache when check free space (%s): free ratio (%d%%), space usage (%d%%), inodes usage (%d%%)", cache.dir, int(cache.freeRatio*100), int(br*100), int(fr*100)) + cache.Lock() + cache.cleanup() + cache.Unlock() + + br, fr = cache.curFreeRatio() + if br < cache.freeRatio || fr < cache.freeRatio { + cache.uploadStaging() + } + } + time.Sleep(time.Second) + } +} + +func (cache *cacheStore) refreshCacheKeys() { + for { + cache.scanCached() + time.Sleep(time.Minute * 5) + } +} + +func (cache *cacheStore) cache(key string, p *Page, force bool) { + if cache.capacity == 0 { + return + } + cache.Lock() + defer cache.Unlock() + if _, ok := cache.pages[key]; ok { + return + } + p.Acquire() + cache.pages[key] = p + atomic.AddInt64(&cache.totalPages, int64(cap(p.Data))) + select { + case cache.pending <- pendingFile{key, p}: + default: + if force { + cache.Unlock() + cache.pending <- pendingFile{key, p} + cache.Lock() + } else { + // does not have enough bandwidth to write it into disk, discard it + logger.Debugf("Caching queue is full (%s), drop %s (%d bytes)", cache.dir, key, len(p.Data)) + cacheDrops.Add(1) + delete(cache.pages, key) + atomic.AddInt64(&cache.totalPages, -int64(cap(p.Data))) + p.Release() + } + } +} + +func (cache *cacheStore) curFreeRatio() (float32, float32) { + total, free, files, ffree := getDiskUsage(cache.dir) + return float32(free) / float32(total), float32(ffree) / float32(files) +} + +func (cache *cacheStore) flushPage(path string, data []byte) (err error) { + start := time.Now() + cacheWrites.Add(1) + cacheWriteBytes.Add(float64(len(data))) + defer func() { + cacheWriteHist.Observe(time.Since(start).Seconds()) + }() + cache.createDir(filepath.Dir(path)) + tmp := path + ".tmp" + f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE, cache.mode) + if err != nil { + logger.Warnf("Can't create cache file %s: %s", tmp, err) + return err + } + defer func() { + if err != nil { + _ = os.Remove(tmp) + } + }() + + if _, err = f.Write(data); err != nil { + logger.Warnf("Write to cache file %s failed: %s", tmp, err) + _ = f.Close() + return + } + if err = f.Close(); err != nil { + logger.Warnf("Close cache file %s failed: %s", tmp, err) + return + } + if err = os.Rename(tmp, path); err != nil { + logger.Warnf("Rename cache file %s -> %s failed: %s", tmp, path, err) + } + return +} + +func (cache *cacheStore) createDir(dir string) { + // who can read the cache, should be able to access the directories and add new file. + readmode := cache.mode & 0444 + mode := cache.mode | (readmode >> 2) | (readmode >> 1) + if st, err := os.Stat(dir); os.IsNotExist(err) { + if filepath.Dir(dir) != dir { + cache.createDir(filepath.Dir(dir)) + } + _ = os.Mkdir(dir, mode) + // umask may remove some permisssions + _ = os.Chmod(dir, mode) + } else if strings.HasPrefix(dir, cache.dir) && err == nil && st.Mode() != mode { + changeMode(dir, st, mode) + } +} + +func (cache *cacheStore) remove(key string) { + cache.Lock() + path := cache.cachePath(key) + if cache.keys[key].atime > 0 { + cache.used -= int64(cache.keys[key].size + 4096) + delete(cache.keys, key) + } else if cache.scanned { + path = "" // not existed + } + cache.Unlock() + if path != "" { + _ = os.Remove(path) + stagingPath := cache.stagePath(key) + if fi, err := os.Stat(stagingPath); err == nil { + size := fi.Size() + if err = os.Remove(stagingPath); err == nil { + stageBlocks.Sub(1) + stageBlockBytes.Sub(float64(size)) + } + } + } +} + +func (cache *cacheStore) load(key string) (ReadCloser, error) { + cache.Lock() + defer cache.Unlock() + if p, ok := cache.pages[key]; ok { + return NewPageReader(p), nil + } + if cache.scanned && cache.keys[key].atime == 0 { + return nil, errors.New("not cached") + } + cache.Unlock() + f, err := os.Open(cache.cachePath(key)) + cache.Lock() + if err == nil { + if it, ok := cache.keys[key]; ok { + // update atime + cache.keys[key] = cacheItem{it.size, uint32(time.Now().Unix())} + } + } + return f, err +} + +func (cache *cacheStore) cachePath(key string) string { + return filepath.Join(cache.dir, cacheDir, key) +} + +func (cache *cacheStore) stagePath(key string) string { + return filepath.Join(cache.dir, stagingDir, key) +} + +// flush cached block into disk +func (cache *cacheStore) flush() { + for { + w := <-cache.pending + path := cache.cachePath(w.key) + if cache.capacity > 0 && cache.flushPage(path, w.page.Data) == nil { + cache.add(w.key, int32(len(w.page.Data)), uint32(time.Now().Unix())) + } + cache.Lock() + delete(cache.pages, w.key) + atomic.AddInt64(&cache.totalPages, -int64(cap(w.page.Data))) + cache.Unlock() + w.page.Release() + } +} + +func (cache *cacheStore) add(key string, size int32, atime uint32) { + cache.Lock() + defer cache.Unlock() + it, ok := cache.keys[key] + if ok && it.size > 0 { + cache.used -= int64(it.size + 4096) + } + if atime == 0 { + // update size of staging block + cache.keys[key] = cacheItem{size, it.atime} + } else { + cache.keys[key] = cacheItem{size, atime} + } + if size > 0 { + cache.used += int64(size + 4096) + } + + if cache.used > cache.capacity { + logger.Debugf("Cleanup cache when add new data (%s): %d blocks (%d MB)", cache.dir, len(cache.keys), cache.used>>20) + cache.cleanup() + } +} + +func (cache *cacheStore) stage(key string, data []byte, keepCache bool) (string, error) { + stagingPath := cache.stagePath(key) + if cache.full { + return stagingPath, errors.New("Space not enough on device") + } + err := cache.flushPage(stagingPath, data) + if err == nil { + stageBlocks.Add(1) + stageBlockBytes.Add(float64(len(data))) + if cache.capacity > 0 && keepCache { + path := cache.cachePath(key) + cache.createDir(filepath.Dir(path)) + if err := os.Link(stagingPath, path); err == nil { + cache.add(key, -int32(len(data)), uint32(time.Now().Unix())) + } else { + logger.Warnf("link %s to %s failed: %s", stagingPath, path, err) + } + } + } + return stagingPath, err +} + +func (cache *cacheStore) uploaded(key string, size int) { + cache.add(key, int32(size), 0) +} + +// locked +func (cache *cacheStore) cleanup() { + if !cache.scanned { + return + } + goal := cache.capacity * 95 / 100 + num := len(cache.keys) * 99 / 100 + // make sure we have enough free space after cleanup + br, fr := cache.curFreeRatio() + if br < cache.freeRatio { + total, _, _, _ := getDiskUsage(cache.dir) + toFree := int64(float32(total) * (cache.freeRatio - br)) + if toFree > cache.used { + goal = 0 + } else if cache.used-toFree < goal { + goal = cache.used - toFree + } + } + if fr < cache.freeRatio { + _, _, files, _ := getDiskUsage(cache.dir) + toFree := int(float32(files) * (cache.freeRatio - fr)) + if toFree > len(cache.keys) { + num = 0 + } else { + num = len(cache.keys) - toFree + } + } + + var todel []string + var freed int64 + var cnt int + var lastKey string + var lastValue cacheItem + var now = uint32(time.Now().Unix()) + // for each two random keys, then compare the access time, evict the older one + for key, value := range cache.keys { + if value.size < 0 { + continue // staging + } + if cnt == 0 || lastValue.atime > value.atime { + lastKey = key + lastValue = value + } + cnt++ + if cnt > 1 { + delete(cache.keys, lastKey) + freed += int64(lastValue.size + 4096) + cache.used -= int64(lastValue.size + 4096) + todel = append(todel, lastKey) + logger.Debugf("remove %s from cache, age: %d", lastKey, now-lastValue.atime) + cacheEvicts.Add(1) + cnt = 0 + if len(cache.keys) < num && cache.used < goal { + break + } + } + } + if len(todel) > 0 { + logger.Debugf("cleanup cache (%s): %d blocks (%d MB), freed %d blocks (%d MB)", cache.dir, len(cache.keys), cache.used>>20, len(todel), freed>>20) + } + cache.Unlock() + for _, key := range todel { + _ = os.Remove(cache.cachePath(key)) + } + cache.Lock() +} + +func (cache *cacheStore) uploadStaging() { + cache.Lock() + defer cache.Unlock() + if !cache.scanned || cache.uploader == nil { + return + } + + var toFree int64 + br, fr := cache.curFreeRatio() + if br < cache.freeRatio || fr < cache.freeRatio { + total, _, _, _ := getDiskUsage(cache.dir) + toFree = int64(float64(total)*float64(cache.freeRatio) - math.Min(float64(br), float64(fr))) + } + var cnt int + var lastKey string + var lastValue cacheItem + // for each two random keys, then compare the access time, upload the older one + for key, value := range cache.keys { + if value.size > 0 { + continue // read cache + } + + // pick the bigger one if they were accessed within the same minute + if cnt == 0 || lastValue.atime/60 > value.atime/60 || + lastValue.atime/60 == value.atime/60 && lastValue.size > value.size { // both size are < 0 + lastKey = key + lastValue = value + } + cnt++ + if cnt > 1 { + cache.Unlock() + cache.uploader(lastKey, cache.stagePath(lastKey)) + logger.Debugf("upload %s, age: %d", lastKey, uint32(time.Now().Unix())-lastValue.atime) + cache.Lock() + // the size in keys should be updated + toFree -= int64(-lastValue.size + 4096) + cnt = 0 + } + + if toFree < 0 { + break + } + } + if cnt > 0 { + cache.Unlock() + cache.uploader(lastKey, cache.stagePath(lastKey)) + logger.Debugf("upload %s, age: %d", lastKey, uint32(time.Now().Unix())-lastValue.atime) + cache.Lock() + } +} + +func (cache *cacheStore) scanCached() { + cache.Lock() + cache.used = 0 + cache.keys = make(map[string]cacheItem) + cache.scanned = false + cache.Unlock() + + var start = time.Now() + var oneMinAgo = start.Add(-time.Minute) + + cachePrefix := filepath.Join(cache.dir, cacheDir) + logger.Debugf("Scan %s to find cached blocks", cachePrefix) + _ = filepath.Walk(cachePrefix, func(path string, fi os.FileInfo, err error) error { + if fi != nil { + if fi.IsDir() || strings.HasSuffix(path, ".tmp") { + if fi.ModTime().Before(oneMinAgo) { + // try to remove empty directory + if os.Remove(path) == nil { + logger.Debugf("Remove empty directory: %s", path) + } + } + } else { + key := path[len(cachePrefix)+1:] + if runtime.GOOS == "windows" { + key = strings.ReplaceAll(key, "\\", "/") + } + atime := uint32(getAtime(fi).Unix()) + if getNlink(fi) > 1 { + cache.add(key, -int32(fi.Size()), atime) + } else { + cache.add(key, int32(fi.Size()), atime) + } + } + } + return nil + }) + + cache.Lock() + cache.scanned = true + logger.Debugf("Found %d cached blocks (%d bytes) in %s with %s", len(cache.keys), cache.used, cache.dir, time.Since(start)) + cache.Unlock() +} + +func (cache *cacheStore) scanStaging() { + if cache.uploader == nil { + return + } + + var start = time.Now() + var oneMinAgo = start.Add(-time.Minute) + var count int + stagingPrefix := filepath.Join(cache.dir, stagingDir) + logger.Debugf("Scan %s to find staging blocks", stagingPrefix) + _ = filepath.Walk(stagingPrefix, func(path string, fi os.FileInfo, err error) error { + if fi != nil { + if fi.IsDir() || strings.HasSuffix(path, ".tmp") { + if fi.ModTime().Before(oneMinAgo) { + // try to remove empty directory + if os.Remove(path) == nil { + logger.Debugf("Remove empty directory: %s", path) + } + } + } else { + logger.Debugf("Found staging block: %s", path) + stageBlocks.Add(1) + stageBlockBytes.Add(float64(fi.Size())) + key := path[len(stagingPrefix)+1:] + if runtime.GOOS == "windows" { + key = strings.ReplaceAll(key, "\\", "/") + } + cache.uploader(key, path) + count++ + } + } + return nil + }) + if count > 0 { + logger.Infof("Found %d staging blocks (%d bytes) in %s with %s", count, cache.used, cache.dir, time.Since(start)) + } +} + +type cacheManager struct { + stores []*cacheStore +} + +func keyHash(s string) uint32 { + hash := fnv.New32() + _, _ = hash.Write([]byte(s)) + return hash.Sum32() +} + +// hasMeta reports whether path contains any of the magic characters +// recognized by Match. +func hasMeta(path string) bool { + magicChars := `*?[` + if runtime.GOOS != "windows" { + magicChars = `*?[\` + } + return strings.ContainsAny(path, magicChars) +} + +var osPathSeparator = string([]byte{os.PathSeparator}) + +func expandDir(pattern string) []string { + pattern = strings.TrimRight(pattern, "/") + if runtime.GOOS == "windows" { + pattern = strings.TrimRight(pattern, osPathSeparator) + } + if pattern == "" { + return []string{"/"} + } + if !hasMeta(pattern) { + return []string{pattern} + } + dir, f := filepath.Split(pattern) + if hasMeta(f) { + matched, err := filepath.Glob(pattern) + if err != nil { + logger.Errorf("glob %s: %s", pattern, err) + return []string{pattern} + } + return matched + } + var rs []string + for _, p := range expandDir(dir) { + rs = append(rs, filepath.Join(p, f)) + } + return rs +} + +type CacheManager interface { + cache(key string, p *Page, force bool) + remove(key string) + load(key string) (ReadCloser, error) + uploaded(key string, size int) + stage(key string, data []byte, keepCache bool) (string, error) + stagePath(key string) string + stats() (int64, int64) + usedMemory() int64 +} + +func newCacheManager(config *Config, uploader func(key, path string)) CacheManager { + if config.CacheDir == "memory" || config.CacheSize == 0 { + return newMemStore(config) + } + var dirs []string + for _, d := range utils.SplitDir(config.CacheDir) { + dd := expandDir(d) + if config.AutoCreate { + dirs = append(dirs, dd...) + } else { + for _, d := range dd { + if fi, err := os.Stat(d); err == nil && fi.IsDir() { + dirs = append(dirs, d) + } + } + } + } + if len(dirs) == 0 { + logger.Warnf("No cache dir existed") + return newMemStore(config) + } + sort.Strings(dirs) + dirCacheSize := config.CacheSize << 20 + dirCacheSize /= int64(len(dirs)) + m := &cacheManager{ + stores: make([]*cacheStore, len(dirs)), + } + // 20% of buffer could be used for pending pages + pendingPages := config.BufferSize * 2 / 10 / config.BlockSize / len(dirs) + for i, d := range dirs { + m.stores[i] = newCacheStore(strings.TrimSpace(d)+string(filepath.Separator), dirCacheSize, pendingPages, config, uploader) + } + return m +} + +func (m *cacheManager) getStore(key string) *cacheStore { + return m.stores[keyHash(key)%uint32(len(m.stores))] +} + +func (m *cacheManager) usedMemory() int64 { + var used int64 + for _, s := range m.stores { + used += s.usedMemory() + } + return used +} + +func (m *cacheManager) stats() (int64, int64) { + var cnt, used int64 + for _, s := range m.stores { + c, u := s.stats() + cnt += c + used += u + } + return cnt, used +} + +func (m *cacheManager) cache(key string, p *Page, force bool) { + m.getStore(key).cache(key, p, force) +} + +type ReadCloser interface { + io.Reader + io.ReaderAt + io.Closer +} + +func (m *cacheManager) load(key string) (ReadCloser, error) { + return m.getStore(key).load(key) +} + +func (m *cacheManager) remove(key string) { + m.getStore(key).remove(key) +} + +func (m *cacheManager) stage(key string, data []byte, keepCache bool) (string, error) { + return m.getStore(key).stage(key, data, keepCache) +} + +func (m *cacheManager) stagePath(key string) string { + return m.getStore(key).stagePath(key) +} + +func (m *cacheManager) uploaded(key string, size int) { + m.getStore(key).uploaded(key, size) +} diff --git a/pkg/chunk/disk_cache_test.go b/pkg/chunk/disk_cache_test.go new file mode 100644 index 0000000..c810fc7 --- /dev/null +++ b/pkg/chunk/disk_cache_test.go @@ -0,0 +1,81 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func TestNewCacheStore(t *testing.T) { + s := newCacheStore(defaultConf.CacheDir, 1<<30, 1, &defaultConf, nil) + if s == nil { + t.Fatalf("Create new cache store failed") + } +} + +func TestExpand(t *testing.T) { + rs := expandDir("/not/exists/jfsCache") + if len(rs) != 1 || rs[0] != "/not/exists/jfsCache" { + t.Errorf("expand: %v", rs) + t.FailNow() + } + + dir := t.TempDir() + _ = os.Mkdir(filepath.Join(dir, "aaa1"), 0755) + _ = os.Mkdir(filepath.Join(dir, "aaa2"), 0755) + _ = os.Mkdir(filepath.Join(dir, "aaa3"), 0755) + _ = os.Mkdir(filepath.Join(dir, "aaa3", "jfscache"), 0755) + _ = os.Mkdir(filepath.Join(dir, "aaa3", "jfscache", "jfs"), 0755) + + rs = expandDir(filepath.Join(dir, "aaa*", "jfscache", "jfs")) + if len(rs) != 3 || rs[0] != filepath.Join(dir, "aaa1", "jfscache", "jfs") { + t.Errorf("expand: %v", rs) + t.FailNow() + } +} + +func BenchmarkLoadCached(b *testing.B) { + dir := b.TempDir() + s := newCacheStore(filepath.Join(dir, "diskCache"), 1<<30, 1, &defaultConf, nil) + p := NewPage(make([]byte, 1024)) + key := "/chunks/1_1024" + s.cache(key, p, false) + time.Sleep(time.Millisecond * 100) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if f, e := s.load(key); e == nil { + _ = f.Close() + } else { + b.FailNow() + } + } +} + +func BenchmarkLoadUncached(b *testing.B) { + dir := b.TempDir() + s := newCacheStore(filepath.Join(dir, "diskCache"), 1<<30, 1, &defaultConf, nil) + key := "/chunks/222_1024" + b.ResetTimer() + for i := 0; i < b.N; i++ { + if f, e := s.load(key); e != nil { + _ = f.Close() + } + } +} diff --git a/pkg/chunk/mem_cache.go b/pkg/chunk/mem_cache.go new file mode 100644 index 0000000..8dc586c --- /dev/null +++ b/pkg/chunk/mem_cache.go @@ -0,0 +1,139 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "errors" + "runtime" + "sync" + "time" +) + +type memItem struct { + atime time.Time + page *Page +} + +type memcache struct { + sync.Mutex + capacity int64 + used int64 + pages map[string]memItem +} + +func newMemStore(config *Config) *memcache { + c := &memcache{ + capacity: config.CacheSize << 20, + pages: make(map[string]memItem), + } + runtime.SetFinalizer(c, func(c *memcache) { + for _, p := range c.pages { + p.page.Release() + } + c.pages = nil + }) + return c +} + +func (c *memcache) usedMemory() int64 { + c.Lock() + defer c.Unlock() + return c.used +} + +func (c *memcache) stats() (int64, int64) { + c.Lock() + defer c.Unlock() + return int64(len(c.pages)), c.used +} + +func (c *memcache) cache(key string, p *Page, force bool) { + if c.capacity == 0 { + return + } + c.Lock() + defer c.Unlock() + if _, ok := c.pages[key]; ok { + return + } + size := int64(cap(p.Data)) + cacheWrites.Add(1) + cacheWriteBytes.Add(float64(size)) + p.Acquire() + c.pages[key] = memItem{time.Now(), p} + c.used += size + if c.used > c.capacity { + c.cleanup() + } +} + +func (c *memcache) delete(key string, p *Page) { + size := int64(cap(p.Data)) + c.used -= size + p.Release() + delete(c.pages, key) +} + +func (c *memcache) remove(key string) { + c.Lock() + defer c.Unlock() + if item, ok := c.pages[key]; ok { + c.delete(key, item.page) + logger.Debugf("remove %s from cache", key) + } +} + +func (c *memcache) load(key string) (ReadCloser, error) { + c.Lock() + defer c.Unlock() + if item, ok := c.pages[key]; ok { + c.pages[key] = memItem{time.Now(), item.page} + return NewPageReader(item.page), nil + } + return nil, errors.New("not found") +} + +// locked +func (c *memcache) cleanup() { + var cnt int + var lastKey string + var lastValue memItem + var now = time.Now() + // for each two random keys, then compare the access time, evict the older one + for k, v := range c.pages { + if cnt == 0 || lastValue.atime.After(v.atime) { + lastKey = k + lastValue = v + } + cnt++ + if cnt > 1 { + logger.Debugf("remove %s from cache, age: %d", lastKey, now.Sub(lastValue.atime)) + cacheEvicts.Add(1) + c.delete(lastKey, lastValue.page) + cnt = 0 + if c.used < c.capacity { + break + } + } + } +} + +func (c *memcache) stage(key string, data []byte, keepCache bool) (string, error) { + return "", errors.New("not supported") +} +func (c *memcache) uploaded(key string, size int) {} +func (c *memcache) stagePath(key string) string { return "" } diff --git a/pkg/chunk/page.go b/pkg/chunk/page.go new file mode 100644 index 0000000..91a698b --- /dev/null +++ b/pkg/chunk/page.go @@ -0,0 +1,138 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "errors" + "io" + "os" + "runtime" + "runtime/debug" + "sync/atomic" + + "github.com/juicedata/juicefs/pkg/utils" +) + +var pageStack = os.Getenv("JFS_PAGE_STACK") != "" + +// Page is a page with refcount +type Page struct { + refs int32 + offheap bool + dep *Page + Data []byte + stack []byte +} + +// NewPage create a new page. +func NewPage(data []byte) *Page { + return &Page{refs: 1, Data: data} +} + +func NewOffPage(size int) *Page { + if size <= 0 { + panic("size of page should > 0") + } + p := utils.Alloc(size) + page := &Page{refs: 1, offheap: true, Data: p} + if pageStack { + page.stack = debug.Stack() + } + runtime.SetFinalizer(page, func(p *Page) { + refcnt := atomic.LoadInt32(&p.refs) + if refcnt != 0 { + logger.Errorf("refcount of page %p (%d bytes) is not zero: %d, created by: %s", p, cap(p.Data), refcnt, string(p.stack)) + if refcnt > 0 { + p.Release() + } + } + }) + return page +} + +func (p *Page) Slice(off, len int) *Page { + p.Acquire() + np := NewPage(p.Data[off : off+len]) + np.dep = p + return np +} + +// Acquire increase the refcount +func (p *Page) Acquire() { + if pageStack { + p.stack = append(p.stack, debug.Stack()...) + } + atomic.AddInt32(&p.refs, 1) +} + +// Release decrease the refcount +func (p *Page) Release() { + if pageStack { + p.stack = append(p.stack, debug.Stack()...) + } + if atomic.AddInt32(&p.refs, -1) == 0 { + if p.offheap { + utils.Free(p.Data) + } + if p.dep != nil { + p.dep.Release() + p.dep = nil + } + p.Data = nil + } +} + +type pageReader struct { + p *Page + off int +} + +func NewPageReader(p *Page) *pageReader { + p.Acquire() + return &pageReader{p, 0} +} + +func (r *pageReader) Read(buf []byte) (int, error) { + n, err := r.ReadAt(buf, int64(r.off)) + r.off += n + return n, err +} + +func (r *pageReader) ReadAt(buf []byte, off int64) (int, error) { + if len(buf) == 0 { + return 0, nil + } + if r.p == nil { + return 0, errors.New("page is already released") + } + if int(off) == len(r.p.Data) { + return 0, io.EOF + } + n := copy(buf, r.p.Data[off:]) + if n < len(buf) { + return n, io.EOF + } + return n, nil +} + +func (r *pageReader) Close() error { + if r.p != nil { + r.p.Release() + r.p = nil + } + return nil +} diff --git a/pkg/chunk/page_test.go b/pkg/chunk/page_test.go new file mode 100644 index 0000000..d122772 --- /dev/null +++ b/pkg/chunk/page_test.go @@ -0,0 +1,81 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "io" + "testing" +) + +func TestPage(t *testing.T) { + p1 := NewOffPage(1) + if len(p1.Data) != 1 { + t.Fail() + } + if cap(p1.Data) != 1 { + t.Fail() + } + p1.Acquire() + p1.Release() + if p1.Data == nil { + t.Fail() + } + + p2 := p1.Slice(0, 1) + p1.Release() + if p1.Data == nil { + t.Fail() + } + + p2.Release() + if p2.Data != nil { + t.Fail() + } + if p1.Data != nil { + t.Fail() + } +} + +func TestPageReader(t *testing.T) { + data := []byte("hello") + p := NewPage(data) + r := NewPageReader(p) + + if n, err := r.Read(nil); n != 0 || err != nil { + t.Fatalf("read should return 0") + } + buf := make([]byte, 3) + if n, err := r.Read(buf); n != 3 || err != nil { + t.Fatalf("read should return 3 but got %d", n) + } + if n, err := r.Read(buf); n != 2 || (err != nil && err != io.EOF) { + t.Fatalf("read should return 2 but got %d", n) + } + if n, err := r.Read(buf); n != 0 || err != io.EOF { + t.Fatalf("read should return 0") + } + if n, err := r.ReadAt(buf, 4); n != 1 || (err != nil && err != io.EOF) { + t.Fatalf("read should return 1") + } + if n, err := r.ReadAt(buf, 5); n != 0 || err != io.EOF { + t.Fatalf("read should return 0") + } + _ = r.Close() + if n, err := r.ReadAt(buf, 5); n != 0 || err == nil { + t.Fatalf("read should fail after close") + } +} diff --git a/pkg/chunk/prefetch.go b/pkg/chunk/prefetch.go new file mode 100644 index 0000000..17528d1 --- /dev/null +++ b/pkg/chunk/prefetch.go @@ -0,0 +1,61 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import "sync" + +type prefetcher struct { + sync.Mutex + pending chan string + busy map[string]bool + op func(key string) +} + +func newPrefetcher(parallel int, fetch func(string)) *prefetcher { + p := &prefetcher{ + pending: make(chan string, 10), + busy: make(map[string]bool), + op: fetch, + } + for i := 0; i < parallel; i++ { + go p.do() + } + return p +} + +func (p *prefetcher) do() { + for key := range p.pending { + p.Lock() + if _, ok := p.busy[key]; !ok { + p.busy[key] = true + p.Unlock() + + p.op(key) + + p.Lock() + delete(p.busy, key) + } + p.Unlock() + } +} + +func (p *prefetcher) fetch(key string) { + select { + case p.pending <- key: + default: + } +} diff --git a/pkg/chunk/prefetch_test.go b/pkg/chunk/prefetch_test.go new file mode 100644 index 0000000..350f15f --- /dev/null +++ b/pkg/chunk/prefetch_test.go @@ -0,0 +1,48 @@ +package chunk + +import ( + "sync/atomic" + "testing" + "time" +) + +func TestPrefetcher(t *testing.T) { + t.Run("should fetch given keys", func(t *testing.T) { + keys := []string{"source/1", "source/2", "source/3", "source/4"} + chRes := make(chan string, len(keys)) + defer close(chRes) + f := newPrefetcher(2, func(k string) { + chRes <- k + "Done" + }) + for _, k := range keys { + f.fetch(k) + } + res := make(map[string]bool, len(keys)) + for range keys { + res[<-chRes] = true + } + if len(res) != len(keys) { + t.Errorf("Incorrect number of keys fetched, expect: %d, got: %d", len(keys), len(res)) + } + for _, k := range keys { + if !res[k+"Done"] { + t.Errorf("Key not fetched: %s", k) + } + } + }) + t.Run("should ignore duplicate keys", func(t *testing.T) { + var counter int32 + f := newPrefetcher(4, func(k string) { + // Introduce a little latency to mimic a slower fetch operation + // so that our few duplicate keys can reach the prefetcher in the time period + time.Sleep(time.Millisecond) + atomic.AddInt32(&counter, 1) + }) + for i := 0; i < 5; i++ { + f.fetch("a") + } + if atomic.LoadInt32(&counter) > 1 { + t.Errorf("Duplicate keys fetched") + } + }) +} diff --git a/pkg/chunk/singleflight.go b/pkg/chunk/singleflight.go new file mode 100644 index 0000000..ee9d852 --- /dev/null +++ b/pkg/chunk/singleflight.go @@ -0,0 +1,70 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import "sync" + +type request struct { + wg sync.WaitGroup + val *Page + ref int + err error +} + +type Controller struct { + sync.Mutex + rs map[string]*request +} + +func (con *Controller) Execute(key string, fn func() (*Page, error)) (*Page, error) { + con.Lock() + if con.rs == nil { + con.rs = make(map[string]*request) + } + if c, ok := con.rs[key]; ok { + c.ref++ + con.Unlock() + c.wg.Wait() + c.val.Acquire() + con.Lock() + c.ref-- + if c.ref == 0 { + c.val.Release() + } + con.Unlock() + return c.val, c.err + } + c := new(request) + c.wg.Add(1) + c.ref++ + con.rs[key] = c + con.Unlock() + + c.val, c.err = fn() + c.val.Acquire() + c.wg.Done() + + con.Lock() + c.ref-- + if c.ref == 0 { + c.val.Release() + } + delete(con.rs, key) + con.Unlock() + + return c.val, c.err +} diff --git a/pkg/chunk/singleflight_test.go b/pkg/chunk/singleflight_test.go new file mode 100644 index 0000000..bb818eb --- /dev/null +++ b/pkg/chunk/singleflight_test.go @@ -0,0 +1,41 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "strconv" + "sync" + "testing" + "time" +) + +func TestSingleFlight(t *testing.T) { + g := &Controller{} + gp := &sync.WaitGroup{} + for i := 0; i < 100000; i++ { + gp.Add(1) + go func(k int) { + p, _ := g.Execute(strconv.Itoa(k/1000), func() (*Page, error) { + time.Sleep(time.Microsecond * 1000) + return NewOffPage(100), nil + }) + p.Release() + gp.Done() + }(i) + } + gp.Wait() +} diff --git a/pkg/chunk/utils_darwin.go b/pkg/chunk/utils_darwin.go new file mode 100644 index 0000000..dae2440 --- /dev/null +++ b/pkg/chunk/utils_darwin.go @@ -0,0 +1,31 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "os" + "syscall" + "time" +) + +func getAtime(fi os.FileInfo) time.Time { + if sst, ok := fi.Sys().(*syscall.Stat_t); ok { + return time.Unix(sst.Atimespec.Unix()) + } else { + return fi.ModTime() + } +} diff --git a/pkg/chunk/utils_linux.go b/pkg/chunk/utils_linux.go new file mode 100644 index 0000000..e8eaca5 --- /dev/null +++ b/pkg/chunk/utils_linux.go @@ -0,0 +1,30 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "os" + "syscall" + "time" +) + +func getAtime(fi os.FileInfo) time.Time { + if sst, ok := fi.Sys().(*syscall.Stat_t); ok { + return time.Unix(sst.Atim.Unix()) + } + return fi.ModTime() +} diff --git a/pkg/chunk/utils_unix.go b/pkg/chunk/utils_unix.go new file mode 100644 index 0000000..e8b552b --- /dev/null +++ b/pkg/chunk/utils_unix.go @@ -0,0 +1,49 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "os" + "syscall" +) + +func getNlink(fi os.FileInfo) int { + if sst, ok := fi.Sys().(*syscall.Stat_t); ok { + return int(sst.Nlink) + } + return 1 +} + +func getDiskUsage(path string) (uint64, uint64, uint64, uint64) { + var stat syscall.Statfs_t + if err := syscall.Statfs(path, &stat); err == nil { + return stat.Blocks * uint64(stat.Bsize), stat.Bavail * uint64(stat.Bsize), stat.Files, stat.Ffree + } else { + logger.Warnf("statfs %s: %s", path, err) + return 1, 1, 1, 1 + } +} + +func changeMode(dir string, st os.FileInfo, mode os.FileMode) { + sst := st.Sys().(*syscall.Stat_t) + if os.Getuid() == int(sst.Uid) { + _ = os.Chmod(dir, mode) + } +} diff --git a/pkg/chunk/utils_windows.go b/pkg/chunk/utils_windows.go new file mode 100644 index 0000000..69d5bb4 --- /dev/null +++ b/pkg/chunk/utils_windows.go @@ -0,0 +1,50 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package chunk + +import ( + "os" + "syscall" + "time" + + sys "golang.org/x/sys/windows" +) + +func getAtime(fi os.FileInfo) time.Time { + stat, ok := fi.Sys().(*syscall.Win32FileAttributeData) + if ok { + return time.Unix(0, stat.LastAccessTime.Nanoseconds()) + } else { + return time.Unix(0, 0) + } +} + +func getNlink(fi os.FileInfo) int { + return 1 +} + +func getDiskUsage(path string) (uint64, uint64, uint64, uint64) { + var freeBytes, total, totalFree uint64 + err := sys.GetDiskFreeSpaceEx(sys.StringToUTF16Ptr(path), &freeBytes, &total, &totalFree) + if err != nil { + logger.Errorf("GetDiskFreeSpaceEx %s: %s", path, err.Error()) + return 1, 1, 1, 1 + } + return total, freeBytes, 1, 1 +} + +func changeMode(dir string, st os.FileInfo, mode os.FileMode) {} diff --git a/pkg/compress/compress.go b/pkg/compress/compress.go new file mode 100644 index 0000000..35cf21b --- /dev/null +++ b/pkg/compress/compress.go @@ -0,0 +1,122 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package compress + +import ( + "fmt" + "strings" + + "github.com/DataDog/zstd" + "github.com/hungys/go-lz4" +) + +// ZSTD_LEVEL compression level used by Zstd +const ZSTD_LEVEL = 1 // fastest + +// Compressor interface to be implemented by a compression algo +type Compressor interface { + Name() string + CompressBound(int) int + Compress(dst, src []byte) (int, error) + Decompress(dst, src []byte) (int, error) +} + +// NewCompressor returns a struct implementing Compressor interface +func NewCompressor(algr string) Compressor { + algr = strings.ToLower(algr) + if algr == "zstd" { + return ZStandard{ZSTD_LEVEL} + } else if algr == "lz4" { + return LZ4{} + } else if algr == "none" || algr == "" { + return noOp{} + } + return nil +} + +type noOp struct{} + +func (n noOp) Name() string { return "Noop" } +func (n noOp) CompressBound(l int) int { return l } +func (n noOp) Compress(dst, src []byte) (int, error) { + if len(dst) < len(src) { + return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(src)) + } + copy(dst, src) + return len(src), nil +} +func (n noOp) Decompress(dst, src []byte) (int, error) { + if len(dst) < len(src) { + return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(src)) + } + copy(dst, src) + return len(src), nil +} + +// ZStandard implements Compressor interface using zstd library +type ZStandard struct { + level int +} + +// Name returns name of the algorithm Zstd +func (n ZStandard) Name() string { return "Zstd" } + +// CompressBound max size of compressed data +func (n ZStandard) CompressBound(l int) int { return zstd.CompressBound(l) } + +// Compress using Zstd +func (n ZStandard) Compress(dst, src []byte) (int, error) { + d, err := zstd.CompressLevel(dst, src, n.level) + if err != nil { + return 0, err + } + if len(d) > 0 && len(dst) > 0 && &d[0] != &dst[0] { + return 0, fmt.Errorf("buffer too short: %d < %d", cap(dst), cap(d)) + } + return len(d), err +} + +// Decompress using Zstd +func (n ZStandard) Decompress(dst, src []byte) (int, error) { + d, err := zstd.Decompress(dst, src) + if err != nil { + return 0, err + } + if len(d) > 0 && len(dst) > 0 && &d[0] != &dst[0] { + return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(d)) + } + return len(d), err +} + +// LZ4 implements Compressor using LZ4 library +type LZ4 struct{} + +// Name returns name of the algorithm LZ4 +func (l LZ4) Name() string { return "LZ4" } + +// CompressBound max size of compressed data +func (l LZ4) CompressBound(size int) int { return lz4.CompressBound(size) } + +// Compress using LZ4 algorithm +func (l LZ4) Compress(dst, src []byte) (int, error) { + return lz4.CompressDefault(src, dst) +} + +// Decompress using LZ4 algorithm +func (l LZ4) Decompress(dst, src []byte) (int, error) { + return lz4.DecompressSafe(src, dst) +} diff --git a/pkg/compress/compress_test.go b/pkg/compress/compress_test.go new file mode 100644 index 0000000..ec3fc83 --- /dev/null +++ b/pkg/compress/compress_test.go @@ -0,0 +1,135 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package compress + +import ( + "io" + "os" + "testing" +) + +func testCompress(t *testing.T, c Compressor) { + src := []byte(c.Name()) + _, err := c.Compress(make([]byte, 1), src) + if err == nil { + t.Fatal("expect short buffer error, but got nil ") + } + dst := make([]byte, c.CompressBound(len(src))) + n, err := c.Compress(dst, src) + if err != nil { + t.Fatalf("compress: %s", err) + } + _, err = c.Decompress(make([]byte, 1), dst[:n]) + if err == nil { + t.Fatalf("expect short buffer error, but got nil") + } + src2 := make([]byte, len(src)) + n, err = c.Decompress(src2, dst[:n]) + if err != nil { + t.Fatalf("decompress: %s", err) + } + if string(src2[:n]) != string(src) { + t.Fatalf("expect %s but got %s", string(src), string(src2)) + } +} + +func TestUncompressed(t *testing.T) { + testCompress(t, NewCompressor("none")) +} + +func TestZstd(t *testing.T) { + testCompress(t, NewCompressor("zstd")) +} + +func TestLZ4(t *testing.T) { + testCompress(t, NewCompressor("lz4")) +} + +func benchmarkDecompress(b *testing.B, comp Compressor) { + f, _ := os.Open(os.Getenv("PAYLOAD")) + var c = make([]byte, 5<<20) + var d = make([]byte, 4<<20) + n, err := io.ReadFull(f, d) + f.Close() + if err != nil { + b.Skip() + return + } + d = d[:n] + n, err = comp.Compress(c[:4<<20], d) + if err != nil { + b.Errorf("compress: %s", err) + b.FailNow() + } + c = c[:n] + // println("compres", comp.Name(), len(c), len(d)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + n, err := comp.Decompress(d, c) + if err != nil { + b.Errorf("decompress %d %s", n, err) + b.FailNow() + } + b.SetBytes(int64(len(d))) + } +} + +func BenchmarkDecompressZstd(b *testing.B) { + benchmarkDecompress(b, NewCompressor("zstd")) +} + +func BenchmarkDecompressLZ4(b *testing.B) { + benchmarkDecompress(b, LZ4{}) +} + +func BenchmarkDecompressNone(b *testing.B) { + benchmarkDecompress(b, NewCompressor("none")) +} + +func benchmarkCompress(b *testing.B, comp Compressor) { + f, _ := os.Open(os.Getenv("PAYLOAD")) + var d = make([]byte, 4<<20) + n, err := io.ReadFull(f, d) + f.Close() + if err != nil { + b.Skip() + return + } + d = d[:n] + var c = make([]byte, 5<<20) + // println("compres", comp.Name(), len(c), len(d)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + n, err := comp.Compress(c, d) + if err != nil { + b.Errorf("compress %d %s", n, err) + b.FailNow() + } + b.SetBytes(int64(len(d))) + } +} + +func BenchmarkCompressZstd(b *testing.B) { + benchmarkCompress(b, NewCompressor("Zstd")) +} + +func BenchmarkCompressCLZ4(b *testing.B) { + benchmarkCompress(b, LZ4{}) +} +func BenchmarkCompressNone(b *testing.B) { + benchmarkCompress(b, NewCompressor("none")) +} diff --git a/pkg/fs/fs.go b/pkg/fs/fs.go new file mode 100644 index 0000000..8f972aa --- /dev/null +++ b/pkg/fs/fs.go @@ -0,0 +1,1136 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "path" + "path/filepath" + "runtime/trace" + "strconv" + "strings" + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/vfs" +) + +var logger = utils.GetLogger("juicefs") + +var checkAccessFile = time.Minute +var rotateAccessLog int64 = 300 << 20 // 300 MiB + +type Ino = meta.Ino +type Attr = meta.Attr +type LogContext = vfs.LogContext + +func IsExist(err error) bool { + return err == syscall.EEXIST || err == syscall.EACCES || err == syscall.EPERM +} + +func IsNotExist(err error) bool { + return err == syscall.ENOENT +} + +func IsNotEmpty(err error) bool { + return err == syscall.ENOTEMPTY +} + +func errstr(e error) string { + if e == nil { + return "OK" + } + if eno, ok := e.(syscall.Errno); ok && eno == 0 { + return "OK" + } + return e.Error() +} + +type FileStat struct { + name string + inode Ino + attr *Attr +} + +func (fs *FileStat) Inode() Ino { return fs.inode } +func (fs *FileStat) Name() string { return fs.name } +func (fs *FileStat) Size() int64 { return int64(fs.attr.Length) } +func (fs *FileStat) Mode() os.FileMode { + attr := fs.attr + mode := os.FileMode(attr.Mode & 0777) + if attr.Mode&04000 != 0 { + mode |= os.ModeSetuid + } + if attr.Mode&02000 != 0 { + mode |= os.ModeSetgid + } + if attr.Mode&01000 != 0 { + mode |= os.ModeSticky + } + switch attr.Typ { + case meta.TypeDirectory: + mode |= os.ModeDir + case meta.TypeSymlink: + mode |= os.ModeSymlink + case meta.TypeFile: + default: + } + return mode +} +func (fs *FileStat) ModTime() time.Time { + return time.Unix(fs.attr.Mtime, int64(fs.attr.Mtimensec)) +} +func (fs *FileStat) IsDir() bool { return fs.attr.Typ == meta.TypeDirectory } +func (fs *FileStat) IsSymlink() bool { return fs.attr.Typ == meta.TypeSymlink } +func (fs *FileStat) Sys() interface{} { return fs.attr } +func (fs *FileStat) Uid() int { return int(fs.attr.Uid) } +func (fs *FileStat) Gid() int { return int(fs.attr.Gid) } + +func (fs *FileStat) Atime() int64 { return fs.attr.Atime*1000 + int64(fs.attr.Atimensec/1e6) } +func (fs *FileStat) Mtime() int64 { return fs.attr.Mtime*1000 + int64(fs.attr.Mtimensec/1e6) } + +func AttrToFileInfo(inode Ino, attr *Attr) *FileStat { + return &FileStat{inode: inode, attr: attr} +} + +type entryCache struct { + inode Ino + typ uint8 + expire time.Time +} + +type attrCache struct { + attr Attr + expire time.Time +} + +type FileSystem struct { + conf *vfs.Config + reader vfs.DataReader + writer vfs.DataWriter + m meta.Meta + + cacheM sync.Mutex + entries map[Ino]map[string]*entryCache + attrs map[Ino]*attrCache + + logBuffer chan string +} + +type File struct { + path string + inode Ino + info *FileStat + fs *FileSystem + + sync.Mutex + flags uint32 + offset int64 + rdata vfs.FileReader + wdata vfs.FileWriter + dircache []os.FileInfo + entries []*meta.Entry +} + +func NewFileSystem(conf *vfs.Config, m meta.Meta, d chunk.ChunkStore) (*FileSystem, error) { + reader := vfs.NewDataReader(conf, m, d) + fs := &FileSystem{ + m: m, + conf: conf, + reader: reader, + writer: vfs.NewDataWriter(conf, m, d, reader), + entries: make(map[meta.Ino]map[string]*entryCache), + attrs: make(map[meta.Ino]*attrCache), + } + go fs.cleanupCache() + if conf.AccessLog != "" { + f, err := os.OpenFile(conf.AccessLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + logger.Errorf("Open access log %s: %s", conf.AccessLog, err) + } else { + _ = os.Chmod(conf.AccessLog, 0666) + fs.logBuffer = make(chan string, 1024) + go fs.flushLog(f, fs.logBuffer, conf.AccessLog) + } + } + return fs, nil +} + +func (fs *FileSystem) cleanupCache() { + for { + fs.cacheM.Lock() + now := time.Now() + var cnt int + for inode, it := range fs.attrs { + if now.After(it.expire) { + delete(fs.attrs, inode) + } + cnt++ + if cnt > 1000 { + break + } + } + cnt = 0 + OUTER: + for inode, es := range fs.entries { + for n, e := range es { + if now.After(e.expire) { + delete(es, n) + if len(es) == 0 { + delete(fs.entries, inode) + } + } + cnt++ + if cnt > 1000 { + break OUTER + } + } + } + fs.cacheM.Unlock() + time.Sleep(time.Second) + } +} + +func (fs *FileSystem) invalidateEntry(parent Ino, name string) { + fs.cacheM.Lock() + defer fs.cacheM.Unlock() + es, ok := fs.entries[parent] + if ok { + delete(es, name) + if len(es) == 0 { + delete(fs.entries, parent) + } + } +} + +func (fs *FileSystem) invalidateAttr(ino Ino) { + fs.cacheM.Lock() + defer fs.cacheM.Unlock() + delete(fs.attrs, ino) +} + +func (fs *FileSystem) log(ctx LogContext, format string, args ...interface{}) { + used := ctx.Duration() + opsDurationsHistogram.Observe(used.Seconds()) + if fs.logBuffer == nil { + return + } + now := utils.Now() + cmd := fmt.Sprintf(format, args...) + ts := now.Format("2006.01.02 15:04:05.000000") + cmd += fmt.Sprintf(" <%.6f>", used.Seconds()) + line := fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd) + select { + case fs.logBuffer <- line: + default: + logger.Debugf("log dropped: %s", line[:len(line)-1]) + } +} + +func (fs *FileSystem) flushLog(f *os.File, logBuffer chan string, path string) { + buf := make([]byte, 0, 128<<10) + var lastcheck = time.Now() + for { + line := <-logBuffer + buf = append(buf[:0], []byte(line)...) + LOOP: + for len(buf) < (128 << 10) { + select { + case line = <-logBuffer: + buf = append(buf, []byte(line)...) + default: + break LOOP + } + } + _, err := f.Write(buf) + if err != nil { + logger.Errorf("write access log: %s", err) + break + } + if lastcheck.Add(checkAccessFile).After(time.Now()) { + continue + } + lastcheck = time.Now() + var fi os.FileInfo + fi, err = f.Stat() + if err == nil && fi.Size() > rotateAccessLog { + _ = f.Close() + fi, err = os.Stat(path) + if err == nil && fi.Size() > rotateAccessLog { + tmp := fmt.Sprintf("%s.%p", path, fs) + if os.Rename(path, tmp) == nil { + for i := 6; i > 0; i-- { + _ = os.Rename(path+"."+strconv.Itoa(i), path+"."+strconv.Itoa(i+1)) + } + _ = os.Rename(tmp, path+".1") + } else { + fi, err = os.Stat(path) + if err == nil && fi.Size() > rotateAccessLog*7 { + logger.Infof("can't rename %s, truncate it", path) + _ = os.Truncate(path, 0) + } + } + } + f, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + logger.Errorf("open %s: %s", path, err) + break + } + _ = os.Chmod(path, 0666) + } + } +} + +func (fs *FileSystem) Meta() meta.Meta { + return fs.m +} + +func (fs *FileSystem) StatFS(ctx meta.Context) (totalspace uint64, availspace uint64) { + defer trace.StartRegion(context.TODO(), "fs.StatFS").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "StatFS (): (%d,%d)", totalspace, availspace) }() + var iused, iavail uint64 + _ = fs.m.StatFS(ctx, &totalspace, &availspace, &iused, &iavail) + return +} + +func (fs *FileSystem) Open(ctx meta.Context, path string, flags uint32) (f *File, err syscall.Errno) { + _, task := trace.NewTask(context.TODO(), "Open") + defer task.End() + l := vfs.NewLogContext(ctx) + if flags != 0 { + defer func() { fs.log(l, "Open (%s,%d): %s", path, flags, errstr(err)) }() + } else { + defer func() { fs.log(l, "Lookup (%s): %s", path, errstr(err)) }() + } + var fi *FileStat + fi, err = fs.resolve(ctx, path, true) + if err != 0 { + return + } + + if flags != 0 && !fi.IsDir() { + err = fs.m.Access(ctx, fi.inode, uint8(flags), fi.attr) + if err != 0 { + return nil, err + } + var oflags uint32 = syscall.O_RDONLY + if flags == vfs.MODE_MASK_W { + oflags = syscall.O_WRONLY + } else if flags&vfs.MODE_MASK_W != 0 { + oflags = syscall.O_RDWR + } + err = fs.m.Open(ctx, fi.inode, oflags, fi.attr) + if err != 0 { + return + } + } + + f = &File{} + f.path = path + f.inode = fi.inode + f.info = fi + f.fs = fs + f.flags = flags + return +} + +func (fs *FileSystem) Access(ctx meta.Context, path string, flags int) (err syscall.Errno) { + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Access (%s): %s", path, errstr(err)) }() + var fi *FileStat + fi, err = fs.resolve(ctx, path, true) + if err != 0 { + return + } + + if ctx.Uid() != 0 && flags != 0 { + err = fs.m.Access(ctx, fi.inode, uint8(flags), fi.attr) + } + return +} + +func (fs *FileSystem) Stat(ctx meta.Context, path string) (fi *FileStat, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Stat").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Stat (%s): %s", path, errstr(err)) }() + return fs.resolve(ctx, path, false) +} + +// parentDir returns parent of /foo/bar/ as /foo +func parentDir(p string) string { + return path.Dir(strings.TrimRight(p, "/")) +} + +func (fs *FileSystem) Mkdir(ctx meta.Context, p string, mode uint16) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Mkdir").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Mkdir (%s, %o): %s", p, mode, errstr(err)) }() + if p == "/" { + return syscall.EEXIST + } + fi, err := fs.resolve(ctx, parentDir(p), true) + if err != 0 { + return err + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return err + } + var inode Ino + err = fs.m.Mkdir(ctx, fi.inode, path.Base(p), mode, 0, 0, &inode, nil) + fs.invalidateEntry(fi.inode, path.Base(p)) + return +} + +func (fs *FileSystem) Delete(ctx meta.Context, p string) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Delete").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Delete (%s): %s", p, errstr(err)) }() + parent, err := fs.resolve(ctx, parentDir(p), true) + if err != 0 { + return + } + fi, err := fs.resolve(ctx, p, false) + if err != 0 { + return + } + err = fs.m.Access(ctx, parent.inode, mMaskW, parent.attr) + if err != 0 { + return err + } + if fi.IsDir() { + err = fs.m.Rmdir(ctx, parent.inode, path.Base(p)) + } else { + err = fs.m.Unlink(ctx, parent.inode, path.Base(p)) + } + fs.invalidateEntry(parent.inode, path.Base(p)) + return +} + +func (fs *FileSystem) Rmr(ctx meta.Context, p string) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Rmr").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Rmr (%s): %s", p, errstr(err)) }() + parent, err := fs.resolve(ctx, parentDir(p), true) + if err != 0 { + return + } + err = meta.Remove(fs.m, ctx, parent.inode, path.Base(p)) + fs.invalidateEntry(parent.inode, path.Base(p)) + return +} + +func (fs *FileSystem) Rename(ctx meta.Context, oldpath string, newpath string, flags uint32) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Rename").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Rename (%s,%s,%d): %s", oldpath, newpath, flags, errstr(err)) }() + oldfi, err := fs.resolve(ctx, parentDir(oldpath), true) + if err != 0 { + return + } + err = fs.m.Access(ctx, oldfi.inode, mMaskW, oldfi.attr) + if err != 0 { + return + } + newfi, err := fs.resolve(ctx, parentDir(newpath), true) + if err != 0 { + return + } + err = fs.m.Access(ctx, newfi.inode, mMaskW, newfi.attr) + if err != 0 { + return + } + err = fs.m.Rename(ctx, oldfi.inode, path.Base(oldpath), newfi.inode, path.Base(newpath), flags, nil, nil) + fs.invalidateEntry(oldfi.inode, path.Base(oldpath)) + fs.invalidateEntry(newfi.inode, path.Base(newpath)) + return +} + +func (fs *FileSystem) Symlink(ctx meta.Context, target string, link string) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Symlink").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Symlink (%s,%s): %s", target, link, errstr(err)) }() + if strings.HasSuffix(link, "/") { + return syscall.EINVAL + } + fi, err := fs.resolve(ctx, parentDir(link), true) + if err != 0 { + return + } + rel, e := filepath.Rel(parentDir(link), target) + if e != nil { + // external link + rel = target + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return + } + err = fs.m.Symlink(ctx, fi.inode, path.Base(link), rel, nil, nil) + fs.invalidateEntry(fi.inode, path.Base(link)) + return +} + +func (fs *FileSystem) Readlink(ctx meta.Context, link string) (path []byte, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Readlink").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Readlink (%s): %s (%d)", link, errstr(err), len(path)) }() + fi, err := fs.resolve(ctx, link, false) + if err != 0 { + return + } + err = fs.m.ReadLink(ctx, fi.inode, &path) + return +} + +const ( + mMaskR = 4 + mMaskW = 2 + mMaskX = 1 +) + +func (fs *FileSystem) Truncate(ctx meta.Context, path string, length uint64) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Truncate").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Truncate (%s,%d): %s", path, length, errstr(err)) }() + fi, err := fs.resolve(ctx, path, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return + } + err = fs.m.Truncate(ctx, fi.inode, 0, length, nil) + return +} + +func (fs *FileSystem) CopyFileRange(ctx meta.Context, src string, soff uint64, dst string, doff uint64, size uint64) (written uint64, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.CopyFileRange").End() + l := vfs.NewLogContext(ctx) + defer func() { + fs.log(l, "CopyFileRange (%s,%d,%s,%d,%d): (%d,%s)", dst, doff, src, soff, size, written, errstr(err)) + }() + var dfi, sfi *FileStat + dfi, err = fs.resolve(ctx, dst, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, dfi.inode, mMaskW, dfi.attr) + if err != 0 { + return + } + sfi, err = fs.resolve(ctx, src, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, sfi.inode, mMaskR, sfi.attr) + if err != 0 { + return + } + err = fs.m.CopyFileRange(ctx, sfi.inode, soff, dfi.inode, doff, size, 0, &written) + return +} + +func (fs *FileSystem) SetXattr(ctx meta.Context, p string, name string, value []byte, flags uint32) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.SetXattr").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "SetXAttr (%s,%s,%d,%d): %s", p, name, len(value), flags, errstr(err)) }() + fi, err := fs.resolve(ctx, p, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return + } + err = fs.m.SetXattr(ctx, fi.inode, name, value, flags) + return +} + +func (fs *FileSystem) GetXattr(ctx meta.Context, p string, name string) (result []byte, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.GetXattr").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "GetXattr (%s,%s): (%d,%s)", p, name, len(result), errstr(err)) }() + fi, err := fs.resolve(ctx, p, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskR, fi.attr) + if err != 0 { + return + } + err = fs.m.GetXattr(ctx, fi.inode, name, &result) + return +} + +func (fs *FileSystem) ListXattr(ctx meta.Context, p string) (names []byte, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.ListXattr").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "ListXattr (%s): (%d,%s)", p, len(names), errstr(err)) }() + fi, err := fs.resolve(ctx, p, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskR, fi.attr) + if err != 0 { + return + } + err = fs.m.ListXattr(ctx, fi.inode, &names) + return +} + +func (fs *FileSystem) RemoveXattr(ctx meta.Context, p string, name string) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.RemoveXattr").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "RemoveXattr (%s,%s): %s", p, name, errstr(err)) }() + fi, err := fs.resolve(ctx, p, true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return + } + err = fs.m.RemoveXattr(ctx, fi.inode, name) + return +} + +func (fs *FileSystem) lookup(ctx meta.Context, parent Ino, name string, inode *Ino, attr *Attr) (err syscall.Errno) { + now := time.Now() + if fs.conf.DirEntryTimeout > 0 || fs.conf.EntryTimeout > 0 { + fs.cacheM.Lock() + es, ok := fs.entries[parent] + if ok { + e, ok := es[name] + if ok { + if now.Before(e.expire) { + ac := fs.attrs[e.inode] + fs.cacheM.Unlock() + *inode = e.inode + if ac == nil || now.After(ac.expire) { + err = fs.m.GetAttr(ctx, e.inode, attr) + if err == 0 && fs.conf.AttrTimeout > 0 { + fs.cacheM.Lock() + fs.attrs[e.inode] = &attrCache{*attr, now.Add(fs.conf.AttrTimeout)} + fs.cacheM.Unlock() + } + } else { + *attr = ac.attr + } + return err + } + delete(es, name) + if len(es) == 0 { + delete(fs.entries, parent) + } + } + } + fs.cacheM.Unlock() + } + + err = fs.m.Lookup(ctx, parent, name, inode, attr) + if err == 0 && (fs.conf.DirEntryTimeout > 0 && attr.Typ == meta.TypeDirectory || fs.conf.EntryTimeout > 0 && attr.Typ != meta.TypeDirectory) { + fs.cacheM.Lock() + if fs.conf.AttrTimeout > 0 { + fs.attrs[*inode] = &attrCache{*attr, now.Add(fs.conf.AttrTimeout)} + } + es, ok := fs.entries[parent] + if !ok { + es = make(map[string]*entryCache) + fs.entries[parent] = es + } + var expire time.Time + if attr.Typ == meta.TypeDirectory { + expire = now.Add(fs.conf.DirEntryTimeout) + } else { + expire = now.Add(fs.conf.EntryTimeout) + } + es[name] = &entryCache{*inode, attr.Typ, expire} + fs.cacheM.Unlock() + } + return err +} + +func (fs *FileSystem) resolve(ctx meta.Context, p string, followLastSymlink bool) (fi *FileStat, err syscall.Errno) { + var inode Ino + var attr = &Attr{} + + if fs.conf.FastResolve { + err = fs.m.Resolve(ctx, 1, p, &inode, attr) + if err == 0 { + fi = AttrToFileInfo(inode, attr) + p = strings.TrimRight(p, "/") + ss := strings.Split(p, "/") + fi.name = ss[len(ss)-1] + } + if err != syscall.ENOTSUP { + return + } + } + + // Fallback to the default implementation that calls `fs.m.Lookup` for each directory along the path. + // It might be slower for deep directories, but it works for every meta that implements `Lookup`. + parent := Ino(1) + ss := strings.Split(p, "/") + for i, name := range ss { + if len(name) == 0 { + continue + } + if parent == 1 && i == len(ss)-1 && vfs.IsSpecialName(name) { + inode, attr := vfs.GetInternalNodeByName(name) + fi = AttrToFileInfo(inode, attr) + parent = inode + break + } + if i > 0 { + if err := fs.m.Access(ctx, parent, mMaskX, attr); err != 0 { + return nil, err + } + } + + var inode Ino + var resolved bool + + err = fs.lookup(ctx, parent, name, &inode, attr) + if i == len(ss)-1 { + resolved = true + } + if err != 0 { + return + } + fi = AttrToFileInfo(inode, attr) + fi.name = name + if (!resolved || followLastSymlink) && fi.IsSymlink() { + var buf []byte + err = fs.m.ReadLink(ctx, inode, &buf) + if err != 0 { + return + } + target := string(buf) + if strings.HasPrefix(target, "/") || strings.Contains(target, "://") { + return &FileStat{name: target}, syscall.ENOTSUP + } + target = path.Join(strings.Join(ss[:i], "/"), target) + fi, err = fs.resolve(ctx, target, followLastSymlink) + if err != 0 { + return + } + inode = fi.Inode() + attr = fi.attr + } + parent = inode + } + if parent == 1 { + err = fs.m.GetAttr(ctx, parent, attr) + if err != 0 { + return + } + fi = AttrToFileInfo(1, attr) + } + return fi, 0 +} + +func (fs *FileSystem) Create(ctx meta.Context, p string, mode uint16) (f *File, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Create").End() + l := vfs.NewLogContext(ctx) + defer func() { fs.log(l, "Create (%s,%o): %s", p, mode, errstr(err)) }() + if strings.HasSuffix(p, "/") { + return nil, syscall.EINVAL + } + var inode Ino + var attr = &Attr{} + var fi *FileStat + fi, err = fs.resolve(ctx, parentDir(p), true) + if err != 0 { + return + } + err = fs.m.Access(ctx, fi.inode, mMaskW, fi.attr) + if err != 0 { + return + } + err = fs.m.Create(ctx, fi.inode, path.Base(p), mode&07777, 0, syscall.O_EXCL, &inode, attr) + if err == 0 { + fi = AttrToFileInfo(inode, attr) + fi.name = path.Base(p) + f = &File{} + f.flags = vfs.MODE_MASK_W + f.path = p + f.inode = fi.inode + f.info = fi + f.fs = fs + } + fs.invalidateEntry(fi.inode, path.Base(p)) + return +} + +func (fs *FileSystem) Flush() error { + buffer := fs.logBuffer + if buffer != nil { + buffer <- "" // flush + } + return nil +} + +func (fs *FileSystem) Close() error { + _ = fs.Flush() + buffer := fs.logBuffer + if buffer != nil { + fs.logBuffer = nil + close(buffer) + } + return nil +} + +// File + +func (f *File) FS() *FileSystem { + return f.fs +} + +func (f *File) Inode() Ino { + return f.inode +} + +func (f *File) Name() string { + return f.path +} + +func (f *File) Stat() (fi os.FileInfo, err error) { + return f.info, nil +} + +func (f *File) Chmod(ctx meta.Context, mode uint16) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Chmod").End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Chmod (%s,%o): %s", f.path, mode, errstr(err)) }() + if ctx.Uid() != 0 && ctx.Uid() != f.info.attr.Uid { + return syscall.EACCES + } + var attr = Attr{Mode: mode} + err = f.fs.m.SetAttr(ctx, f.inode, meta.SetAttrMode, 0, &attr) + f.fs.invalidateAttr(f.inode) + return +} + +func (f *File) Chown(ctx meta.Context, uid uint32, gid uint32) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Chown").End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Chown (%s,%d,%d): %s", f.path, uid, gid, errstr(err)) }() + var flag uint16 + if uid != uint32(f.info.Uid()) { + if ctx.Uid() != 0 { + return syscall.EACCES + } + flag |= meta.SetAttrUID + } + if gid != uint32(f.info.Gid()) { + if ctx.Uid() != 0 { + if ctx.Uid() != uint32(f.info.Uid()) { + return syscall.EACCES + } + var found = false + for _, g := range ctx.Gids() { + if gid == g { + found = true + break + } + } + if !found { + return syscall.EACCES + } + } + flag |= meta.SetAttrGID + } + var attr = Attr{Uid: uid, Gid: gid} + err = f.fs.m.SetAttr(ctx, f.inode, flag, 0, &attr) + f.fs.invalidateAttr(f.inode) + return +} + +func (f *File) Utime(ctx meta.Context, atime, mtime int64) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Utime").End() + var flag uint16 + if atime >= 0 { + flag |= meta.SetAttrAtime + } + if mtime >= 0 { + flag |= meta.SetAttrMtime + } + if flag == 0 { + return 0 + } + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Utime (%s,%d,%d): %s", f.path, atime, mtime, errstr(err)) }() + err = f.fs.m.Access(ctx, f.inode, mMaskW, f.info.attr) + if err != 0 { + return err + } + var attr Attr + attr.Atime = atime / 1000 + attr.Atimensec = uint32(atime%1000) * 1e6 + attr.Mtime = mtime / 1000 + attr.Mtimensec = uint32(mtime%1000) * 1e6 + err = f.fs.m.SetAttr(ctx, f.inode, flag, 0, &attr) + return +} + +func (f *File) Seek(ctx meta.Context, offset int64, whence int) (int64, error) { + defer trace.StartRegion(context.TODO(), "fs.Seek").End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Seek (%s,%d,%d): %d", f.path, offset, whence, f.offset) }() + f.Lock() + defer f.Unlock() + switch whence { + case io.SeekStart: + f.offset = offset + case io.SeekCurrent: + f.offset += offset + case io.SeekEnd: + f.offset = f.info.Size() + offset + } + return f.offset, nil +} + +func (f *File) Read(ctx meta.Context, b []byte) (n int, err error) { + _, task := trace.NewTask(context.TODO(), "Read") + defer task.End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Read (%s,%d): (%d,%s)", f.path, len(b), n, errstr(err)) }() + f.Lock() + defer f.Unlock() + n, err = f.pread(ctx, b, f.offset) + f.offset += int64(n) + return +} + +func (f *File) Pread(ctx meta.Context, b []byte, offset int64) (n int, err error) { + _, task := trace.NewTask(context.TODO(), "Pread") + defer task.End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Pread (%s,%d,%d): (%d,%s)", f.path, len(b), offset, n, errstr(err)) }() + f.Lock() + defer f.Unlock() + n, err = f.pread(ctx, b, offset) + return +} + +func (f *File) pread(ctx meta.Context, b []byte, offset int64) (n int, err error) { + if offset >= f.info.Size() { + return 0, io.EOF + } + if int64(len(b))+offset > f.info.Size() { + b = b[:f.info.Size()-offset] + } + if f.wdata != nil { + eno := f.wdata.Flush(ctx) + if eno != 0 { + err = eno + return + } + } + if f.rdata == nil { + f.rdata = f.fs.reader.Open(f.inode, uint64(f.info.Size())) + } + + got, eno := f.rdata.Read(ctx, uint64(offset), b) + for eno == syscall.EAGAIN { + got, eno = f.rdata.Read(ctx, uint64(offset), b) + } + if eno != 0 { + err = eno + return + } + if got == 0 { + return 0, io.EOF + } + readSizeHistogram.Observe(float64(got)) + return got, nil +} + +func (f *File) Write(ctx meta.Context, b []byte) (n int, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Write").End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Write (%s,%d): (%d,%s)", f.path, len(b), n, errstr(err)) }() + f.Lock() + defer f.Unlock() + n, err = f.pwrite(ctx, b, f.offset) + f.offset += int64(n) + return +} + +func (f *File) Pwrite(ctx meta.Context, b []byte, offset int64) (n int, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Pwrite").End() + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Pwrite (%s,%d,%d): (%d,%s)", f.path, len(b), offset, n, errstr(err)) }() + f.Lock() + defer f.Unlock() + n, err = f.pwrite(ctx, b, offset) + return +} + +func (f *File) pwrite(ctx meta.Context, b []byte, offset int64) (n int, err syscall.Errno) { + if f.wdata == nil { + f.wdata = f.fs.writer.Open(f.inode, uint64(f.info.Size())) + } + err = f.wdata.Write(ctx, uint64(offset), b) + if err != 0 { + _ = f.wdata.Close(meta.Background) + f.wdata = nil + return + } + if offset+int64(len(b)) > int64(f.info.attr.Length) { + f.info.attr.Length = uint64(offset + int64(len(b))) + } + writtenSizeHistogram.Observe(float64(len(b))) + return len(b), 0 +} + +func (f *File) Flush(ctx meta.Context) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Flush").End() + f.Lock() + defer f.Unlock() + if f.wdata == nil { + return + } + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Flush (%s): %s", f.path, errstr(err)) }() + err = f.wdata.Flush(ctx) + return +} + +func (f *File) Fsync(ctx meta.Context) (err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Fsync").End() + f.Lock() + defer f.Unlock() + if f.wdata == nil { + return 0 + } + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Fsync (%s): %s", f.path, errstr(err)) }() + err = f.wdata.Flush(ctx) + return +} + +func (f *File) Close(ctx meta.Context) (err syscall.Errno) { + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Close (%s): %s", f.path, errstr(err)) }() + f.Lock() + defer f.Unlock() + if f.flags != 0 && !f.info.IsDir() { + f.offset = 0 + if f.rdata != nil { + rdata := f.rdata + f.rdata = nil + time.AfterFunc(time.Second, func() { + rdata.Close(meta.Background) + }) + } + if f.wdata != nil { + err = f.wdata.Close(meta.Background) + f.wdata = nil + } + _ = f.fs.m.Close(ctx, f.inode) + } + return +} + +func (f *File) Readdir(ctx meta.Context, count int) (fi []os.FileInfo, err syscall.Errno) { + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "Readdir (%s,%d): (%s,%d)", f.path, count, errstr(err), len(fi)) }() + f.Lock() + defer f.Unlock() + fi = f.dircache + if fi == nil { + err = f.fs.m.Access(ctx, f.inode, mMaskR, f.info.attr) + if err != 0 { + return nil, err + } + var inodes []*meta.Entry + err = f.fs.m.Readdir(ctx, f.inode, 1, &inodes) + if err != 0 { + return + } + // skip . and .. + for _, n := range inodes[2:] { + i := AttrToFileInfo(n.Inode, n.Attr) + i.name = string(n.Name) + fi = append(fi, i) + } + f.dircache = fi + } + + if len(fi) < int(f.offset) { + return nil, 0 + } + fi = fi[f.offset:] + if count > 0 && len(fi) > count { + fi = fi[:count] + } + f.offset += int64(len(fi)) + return +} + +func (f *File) ReaddirPlus(ctx meta.Context, offset int) (entries []*meta.Entry, err syscall.Errno) { + l := vfs.NewLogContext(ctx) + defer func() { f.fs.log(l, "ReaddirPlus (%s,%d): (%s,%d)", f.path, offset, errstr(err), len(entries)) }() + f.Lock() + defer f.Unlock() + if f.entries == nil { + err = f.fs.m.Access(ctx, f.inode, mMaskR|mMaskX, f.info.attr) + if err != 0 { + return nil, err + } + var es []*meta.Entry + err = f.fs.m.Readdir(ctx, f.inode, 1, &es) + if err != 0 { + return + } + // filter out . and .. + f.entries = make([]*meta.Entry, 0, len(es)) + for _, e := range es { + if !bytes.Equal(e.Name, []byte{'.'}) && !bytes.Equal(e.Name, []byte("..")) { + f.entries = append(f.entries, e) + } + } + } + if offset >= len(f.entries) { + offset = len(f.entries) + } + entries = f.entries[offset:] + return +} + +func (f *File) Summary(ctx meta.Context) (s *meta.Summary, err syscall.Errno) { + defer trace.StartRegion(context.TODO(), "fs.Summary").End() + l := vfs.NewLogContext(ctx) + defer func() { + f.fs.log(l, "Summary (%s): %s (%d,%d,%d,%d)", f.path, errstr(err), s.Length, s.Size, s.Files, s.Dirs) + }() + s = &meta.Summary{} + err = meta.GetSummary(f.fs.m, ctx, f.inode, s, true) + return +} diff --git a/pkg/fs/fs_test.go b/pkg/fs/fs_test.go new file mode 100644 index 0000000..bb0d132 --- /dev/null +++ b/pkg/fs/fs_test.go @@ -0,0 +1,289 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs + +import ( + "io" + "os" + "sort" + "syscall" + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/vfs" +) + +func TestFileStat(t *testing.T) { + attr := meta.Attr{ + Typ: meta.TypeDirectory, + Mode: 07740, + Atime: 1, + Mtime: 2, + } + st := AttrToFileInfo(2, &attr) + if st.Inode() != 2 { + t.Fatalf("inode should be 2") + } + if !st.IsDir() { + t.Fatalf("should be a dir") + } + mode := st.Mode() + if mode&os.ModeSticky == 0 { + t.Fatalf("sticky bit should be set") + } + if mode&os.ModeSetuid == 0 { + t.Fatalf("suid should be set") + } + if mode&os.ModeSetgid == 0 { + t.Fatalf("sgid should be set") + } + if st.ModTime().Unix() != 2 { + t.Fatalf("unixtimestamp : %d", st.ModTime().Unix()) + } + if st.Sys() != &attr { + t.Fatalf("sys should be meta attr") + } + attr.Typ = meta.TypeSymlink + if !st.IsSymlink() { + t.Fatalf("should be a symlink") + } +} + +// nolint:errcheck +func TestFileSystem(t *testing.T) { + checkAccessFile = time.Millisecond + rotateAccessLog = 500 + m := meta.NewClient("memkv://", &meta.Config{MaxDeletes: 1}) + format := meta.Format{ + Name: "test", + BlockSize: 4096, + Capacity: 1 << 30, + } + _ = m.Init(format, true) + var conf = vfs.Config{ + Meta: &meta.Config{}, + Chunk: &chunk.Config{ + BlockSize: format.BlockSize << 10, + MaxUpload: 1, + BufferSize: 100 << 20, + }, + DirEntryTimeout: time.Millisecond * 100, + EntryTimeout: time.Millisecond * 100, + AttrTimeout: time.Millisecond * 100, + AccessLog: "/tmp/juicefs.access.log", + } + objStore, _ := object.CreateStorage("mem", "", "", "") + store := chunk.NewCachedStore(objStore, *conf.Chunk) + fs, _ := NewFileSystem(&conf, m, store) + ctx := meta.NewContext(1, 1, []uint32{2}) + if total, avail := fs.StatFS(ctx); total != 1<<30 || avail != (1<<30) { + t.Fatalf("statfs: %d %d", total, avail) + } + if e := fs.Access(ctx, "/", 7); e != 0 { + t.Fatalf("access /: %s", e) + } + f, err := fs.Create(ctx, "/hello", 0644) + if err != 0 { + t.Fatalf("create /hello: %s", err) + } + if f.Name() != "/hello" { + t.Fatalf("name: %s", f.Name()) + } + _ = f.Close(ctx) + f, err = fs.Open(ctx, "/hello", mMaskR|mMaskW) + if err != 0 { + t.Fatalf("open %s", err) + } + if fi, err := f.Stat(); err != nil || fi.Mode() != 0644 { + t.Fatalf("stat: %s %+v", err, fi) + } + if n, err := f.Write(ctx, []byte("world")); err != 0 || n != 5 { + t.Fatalf("write 5 bytes: %d %s", n, err) + } + if err := f.Fsync(ctx); err != 0 { + t.Fatalf("fsync: %s", err) + } + var buf = make([]byte, 10) + if n, err := f.Pread(ctx, buf, 2); err != nil || n != 3 || string(buf[:n]) != "rld" { + t.Fatalf("pread(2): %d %s %s", n, err, string(buf[:n])) + } + if n, err := f.Seek(ctx, -3, io.SeekEnd); err != nil || n != 2 { + t.Fatalf("seek 3 bytes before end: %d %s", n, err) + } + if n, err := f.Write(ctx, []byte("t")); err != 0 || n != 1 { + t.Fatalf("write 1 bytes: %d %s", n, err) + } + if n, err := f.Seek(ctx, -2, io.SeekCurrent); err != nil || n != 1 { + t.Fatalf("seek 2 bytes before current: %d %s", n, err) + } + if n, err := f.Read(ctx, buf); err != nil || n != 4 || string(buf[:n]) != "otld" { + t.Fatalf("read(): %d %s %s", n, err, string(buf[:n])) + } + if n, err := f.Read(ctx, buf); err != io.EOF || n != 0 { + t.Fatalf("read(): %d %s %s", n, err, string(buf[:n])) + } + if n, err := f.Pwrite(ctx, []byte("t"), 1); err != 0 || n != 1 { + t.Fatalf("write 1 bytes: %d %s", n, err) + } + if e := f.Flush(ctx); e != 0 { + t.Fatalf("flush /hello: %s", e) + } + + if e := f.Chmod(ctx, 0640); e != 0 { + t.Fatalf("chown: %s", e) + } + if e := f.Chown(ctx, 1, 2); e != 0 { + t.Fatalf("chown: %s", e) + } + if e := f.Utime(ctx, 1, 2); e != 0 { + t.Fatalf("utime: %s", e) + } + if s, e := f.Summary(ctx); e != 0 || s.Dirs != 0 || s.Files != 1 || s.Length != 5 || s.Size != 4<<10 { + t.Fatalf("summary: %s %+v", e, s) + } + if e := f.Close(ctx); e != 0 { + t.Fatalf("close /hello: %s", e) + } + if fi, err := fs.Stat(ctx, "/hello"); err != 0 { + t.Fatalf("stat /hello: %s", err) + } else if fi.Mode() != 0640 || fi.Uid() != 1 || fi.Gid() != 2 || fi.Atime() != 1 || fi.Mtime() != 2 { + t.Fatalf("stat /hello: %+v", fi) + } + if e := fs.Truncate(ctx, "/hello", 2); e != 0 { + t.Fatalf("truncate : %s", e) + } + if n, e := fs.CopyFileRange(ctx, "/hello", 0, "/hello", 5, 5); e != 0 || n != 2 { + t.Fatalf("copyfilerange: %s %d", e, n) + } + + if e := fs.SetXattr(ctx, "/hello", "k", []byte("value"), 0); e != 0 { + t.Fatalf("setxattr /hello: %s", e) + } + if v, e := fs.GetXattr(ctx, "/hello", "k"); e != 0 || string(v) != "value" { + t.Fatalf("getxattr /hello: %s %s", e, string(v)) + } + if names, e := fs.ListXattr(ctx, "/hello"); e != 0 || string(names) != "k\x00" { + t.Fatalf("listxattr /hello: %s %+v", e, names) + } + if e := fs.RemoveXattr(ctx, "/hello", "k"); e != 0 { + t.Fatalf("removexattr /hello: %s", e) + } + + if e := fs.Symlink(ctx, "hello", "/sym"); e != 0 { + t.Fatalf("symlink: %s", e) + } + if target, e := fs.Readlink(ctx, "/sym"); e != 0 || string(target) != "hello" { + t.Fatalf("readlink: %s", string(target)) + } + if fi, err := fs.Stat(ctx, "/sym"); err != 0 || fi.name != "sym" { + t.Fatalf("stat symlink: %s %+v", err, fi) + } + if err := fs.Delete(ctx, "/sym"); err != 0 { + t.Fatalf("delete /sym: %s", err) + } + + if _, e := fs.Open(meta.NewContext(2, 2, []uint32{3}), "/hello", mMaskW); e == 0 || e != syscall.EACCES { + t.Fatalf("open without permission: %s", e) + } + + if err := fs.Mkdir(ctx, "/d", 0755); err != 0 { + t.Fatalf("mkdir /d: %s", err) + } + d, e := fs.Open(ctx, "/", 0) + if e != 0 { + t.Fatalf("open /: %s", e) + } + defer d.Close(ctx) + if fis, e := d.Readdir(ctx, 0); e != 0 || len(fis) != 2 { + t.Fatalf("readdir /: %s, %d entries", e, len(fis)) + } else { + sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() }) + if fis[0].Name() != "d" || fis[1].Name() != "hello" { + t.Fatalf("readdir names: %+v", fis) + } + } + if es, e := d.ReaddirPlus(ctx, 0); e != 0 || len(es) != 2 { + t.Fatalf("readdirplus: %s, %d entries", e, len(es)) + } else { + sort.Slice(es, func(i, j int) bool { return es[i].Inode < es[j].Inode }) + if string(es[0].Name) != "hello" || string(es[1].Name) != "d" { + t.Fatalf("readdirplus names: %+v", es) + } + } + if e := fs.Rename(ctx, "/hello", "/d/f", 0); e != 0 { + t.Fatalf("rename: %s", e) + } + if e := fs.Symlink(ctx, "d", "/sd"); e != 0 { + t.Fatalf("symlink: %s", e) + } + if fi, e := fs.Stat(ctx, "/sd/f"); e != 0 || fi.name != "f" { + t.Fatalf("follow symlink: %s %+v", e, fi) + } + + if s, e := d.Summary(ctx); e != 0 || s.Dirs != 2 || s.Files != 2 || s.Length != 8 || s.Size != 16<<10 { + t.Fatalf("summary: %s %+v", e, s) + } + if e := fs.Delete(ctx, "/d"); e == 0 || !IsNotEmpty(e) { + t.Fatalf("rmdir: %s", e) + } + if err := fs.Delete(ctx, "/d/f"); err != 0 { + t.Fatalf("delete /d/f: %s", err) + } + if err := fs.Delete(ctx, "/d/f"); err == 0 || !IsNotExist(err) { + t.Fatalf("delete /d/f: %s", err) + } + if e := fs.Rmr(ctx, "/d"); e != 0 { + t.Fatalf("delete /d -r: %s", e) + } + + time.Sleep(time.Second * 2) + if e := fs.Flush(); e != nil { + t.Fatalf("flush : %s", e) + } + if e := fs.Close(); e != nil { + t.Fatalf("close: %s", e) + } + if e := fs.Close(); e != nil { + t.Fatalf("close: %s", e) + } + + // path with trailing / + if err := fs.Mkdir(ctx, "/ddd/", 0777); err != 0 { + t.Fatalf("mkdir /ddd/: %s", err) + } + if _, err := fs.Create(ctx, "/ddd/ddd", 0777); err != 0 { + t.Fatalf("create /ddd/ddd: %s", err) + } + if _, err := fs.Create(ctx, "/ddd/fff/", 0777); err != syscall.EINVAL { + t.Fatalf("create /ddd/fff/: %s", err) + } + if err := fs.Delete(ctx, "/ddd/"); err != syscall.ENOTEMPTY { + t.Fatalf("delete /ddd/: %s", err) + } + if err := fs.Rename(ctx, "/ddd/", "/ttt/", 0); err != 0 { + t.Fatalf("delete /ddd/: %s", err) + } + if err := fs.Rmr(ctx, "/ttt/"); err != 0 { + t.Fatalf("rmr /ttt/: %s", err) + } + if _, err := fs.Stat(ctx, "/ttt/"); err != syscall.ENOENT { + t.Fatalf("stat /ttt/: %s", err) + } +} diff --git a/pkg/fs/metrics.go b/pkg/fs/metrics.go new file mode 100644 index 0000000..bc175f7 --- /dev/null +++ b/pkg/fs/metrics.go @@ -0,0 +1,37 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs + +import "github.com/prometheus/client_golang/prometheus" + +var ( + readSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "sdk_read_size_bytes", + Help: "size of read distributions.", + Buckets: prometheus.LinearBuckets(4096, 4096, 32), + }) + writtenSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "sdk_written_size_bytes", + Help: "size of write distributions.", + Buckets: prometheus.LinearBuckets(4096, 4096, 32), + }) + opsDurationsHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "sdk_ops_durations_histogram_seconds", + Help: "Operations latency distributions.", + Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30), + }) +) diff --git a/pkg/fuse/context.go b/pkg/fuse/context.go new file mode 100644 index 0000000..21ea90c --- /dev/null +++ b/pkg/fuse/context.go @@ -0,0 +1,114 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fuse + +import ( + "context" + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/vfs" + + "github.com/hanwen/go-fuse/v2/fuse" +) + +// Ino is an alias to meta.Ino +type Ino = meta.Ino + +// Attr is an alias to meta.Attr +type Attr = meta.Attr + +// Context is an alias to vfs.LogContext +type Context = vfs.LogContext + +type fuseContext struct { + context.Context + start time.Time + header *fuse.InHeader + canceled bool + cancel <-chan struct{} +} + +var contextPool = sync.Pool{ + New: func() interface{} { + return &fuseContext{} + }, +} + +func newContext(cancel <-chan struct{}, header *fuse.InHeader) *fuseContext { + ctx := contextPool.Get().(*fuseContext) + ctx.Context = context.Background() + ctx.start = time.Now() + ctx.canceled = false + ctx.cancel = cancel + ctx.header = header + return ctx +} + +func releaseContext(ctx *fuseContext) { + contextPool.Put(ctx) +} + +func (c *fuseContext) Uid() uint32 { + return c.header.Uid +} + +func (c *fuseContext) Gid() uint32 { + return c.header.Gid +} + +func (c *fuseContext) Gids() []uint32 { + return []uint32{c.header.Gid} +} + +func (c *fuseContext) Pid() uint32 { + return c.header.Pid +} + +func (c *fuseContext) Duration() time.Duration { + return time.Since(c.start) +} + +func (c *fuseContext) Cancel() { + c.canceled = true +} + +func (c *fuseContext) Canceled() bool { + if c.canceled { + return true + } + select { + case <-c.cancel: + return true + default: + return false + } +} + +func (c *fuseContext) WithValue(k, v interface{}) { + c.Context = context.WithValue(c.Context, k, v) +} + +func (c *fuseContext) Err() error { + return syscall.EINTR +} + +// func (c *fuseContext) Done() <-chan struct{} { +// return c.cancel +// } diff --git a/pkg/fuse/fuse.go b/pkg/fuse/fuse.go new file mode 100644 index 0000000..df143ba --- /dev/null +++ b/pkg/fuse/fuse.go @@ -0,0 +1,462 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fuse + +import ( + "fmt" + "os" + "runtime" + "strings" + "syscall" + "time" + + "github.com/hanwen/go-fuse/v2/fuse" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/vfs" +) + +var logger = utils.GetLogger("juicefs") + +type fileSystem struct { + fuse.RawFileSystem + conf *vfs.Config + v *vfs.VFS +} + +func newFileSystem(conf *vfs.Config, v *vfs.VFS) *fileSystem { + return &fileSystem{ + RawFileSystem: fuse.NewDefaultRawFileSystem(), + conf: conf, + v: v, + } +} + +func (fs *fileSystem) replyEntry(out *fuse.EntryOut, e *meta.Entry) fuse.Status { + out.NodeId = uint64(e.Inode) + out.Generation = 1 + out.SetAttrTimeout(fs.conf.AttrTimeout) + if e.Attr.Typ == meta.TypeDirectory { + out.SetEntryTimeout(fs.conf.DirEntryTimeout) + } else { + out.SetEntryTimeout(fs.conf.EntryTimeout) + } + if vfs.IsSpecialNode(e.Inode) { + out.SetAttrTimeout(time.Hour) + } + attrToStat(e.Inode, e.Attr, &out.Attr) + return 0 +} + +func (fs *fileSystem) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name string, out *fuse.EntryOut) (status fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + entry, err := fs.v.Lookup(ctx, Ino(header.NodeId), name) + if err != 0 { + return fuse.Status(err) + } + return fs.replyEntry(out, entry) +} + +func (fs *fileSystem) GetAttr(cancel <-chan struct{}, in *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + var opened uint8 + if in.Fh() != 0 { + opened = 1 + } + entry, err := fs.v.GetAttr(ctx, Ino(in.NodeId), opened) + if err != 0 { + return fuse.Status(err) + } + attrToStat(entry.Inode, entry.Attr, &out.Attr) + out.AttrValid = uint64(fs.conf.AttrTimeout.Seconds()) + if vfs.IsSpecialNode(Ino(in.NodeId)) { + out.AttrValid = 3600 + } + return 0 +} + +func (fs *fileSystem) SetAttr(cancel <-chan struct{}, in *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + var opened uint8 + if in.Fh != 0 { + opened = 1 + } + entry, err := fs.v.SetAttr(ctx, Ino(in.NodeId), int(in.Valid), opened, in.Mode, in.Uid, in.Gid, int64(in.Atime), int64(in.Mtime), in.Atimensec, in.Mtimensec, in.Size) + if err != 0 { + return fuse.Status(err) + } + out.AttrValid = uint64(fs.conf.AttrTimeout.Seconds()) + if vfs.IsSpecialNode(entry.Inode) { + out.AttrValid = 3600 + } + attrToStat(entry.Inode, entry.Attr, &out.Attr) + return 0 +} + +func (fs *fileSystem) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out *fuse.EntryOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entry, err := fs.v.Mknod(ctx, Ino(in.NodeId), name, uint16(in.Mode), getUmask(in), in.Rdev) + if err != 0 { + return fuse.Status(err) + } + return fs.replyEntry(out, entry) +} + +func (fs *fileSystem) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out *fuse.EntryOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entry, err := fs.v.Mkdir(ctx, Ino(in.NodeId), name, uint16(in.Mode), uint16(in.Umask)) + if err != 0 { + return fuse.Status(err) + } + return fs.replyEntry(out, entry) +} + +func (fs *fileSystem) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + err := fs.v.Unlink(ctx, Ino(header.NodeId), name) + return fuse.Status(err) +} + +func (fs *fileSystem) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + err := fs.v.Rmdir(ctx, Ino(header.NodeId), name) + return fuse.Status(err) +} + +func (fs *fileSystem) Rename(cancel <-chan struct{}, in *fuse.RenameIn, oldName string, newName string) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Rename(ctx, Ino(in.NodeId), oldName, Ino(in.Newdir), newName, in.Flags) + return fuse.Status(err) +} + +func (fs *fileSystem) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out *fuse.EntryOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entry, err := fs.v.Link(ctx, Ino(in.Oldnodeid), Ino(in.NodeId), name) + if err != 0 { + return fuse.Status(err) + } + return fs.replyEntry(out, entry) +} + +func (fs *fileSystem) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target string, name string, out *fuse.EntryOut) (code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + entry, err := fs.v.Symlink(ctx, target, Ino(header.NodeId), name) + if err != 0 { + return fuse.Status(err) + } + return fs.replyEntry(out, entry) +} + +func (fs *fileSystem) Readlink(cancel <-chan struct{}, header *fuse.InHeader) (out []byte, code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + path, err := fs.v.Readlink(ctx, Ino(header.NodeId)) + return path, fuse.Status(err) +} + +func (fs *fileSystem) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (sz uint32, code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + value, err := fs.v.GetXattr(ctx, Ino(header.NodeId), attr, uint32(len(dest))) + if err != 0 { + return 0, fuse.Status(err) + } + copy(dest, value) + return uint32(len(value)), 0 +} + +func (fs *fileSystem) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (uint32, fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + data, err := fs.v.ListXattr(ctx, Ino(header.NodeId), len(dest)) + if err != 0 { + return 0, fuse.Status(err) + } + copy(dest, data) + return uint32(len(data)), 0 +} + +func (fs *fileSystem) SetXAttr(cancel <-chan struct{}, in *fuse.SetXAttrIn, attr string, data []byte) fuse.Status { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.SetXattr(ctx, Ino(in.NodeId), attr, data, in.Flags) + return fuse.Status(err) +} + +func (fs *fileSystem) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) (code fuse.Status) { + ctx := newContext(cancel, header) + defer releaseContext(ctx) + err := fs.v.RemoveXattr(ctx, Ino(header.NodeId), attr) + return fuse.Status(err) +} + +func (fs *fileSystem) Create(cancel <-chan struct{}, in *fuse.CreateIn, name string, out *fuse.CreateOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entry, fh, err := fs.v.Create(ctx, Ino(in.NodeId), name, uint16(in.Mode), 0, in.Flags) + if err != 0 { + return fuse.Status(err) + } + out.Fh = fh + return fs.replyEntry(&out.EntryOut, entry) +} + +func (fs *fileSystem) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entry, fh, err := fs.v.Open(ctx, Ino(in.NodeId), in.Flags) + if err != 0 { + return fuse.Status(err) + } + out.Fh = fh + if vfs.IsSpecialNode(Ino(in.NodeId)) { + out.OpenFlags |= fuse.FOPEN_DIRECT_IO + } else if entry.Attr.KeepCache { + out.OpenFlags |= fuse.FOPEN_KEEP_CACHE + } + return 0 +} + +func (fs *fileSystem) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + n, err := fs.v.Read(ctx, Ino(in.NodeId), buf, in.Offset, in.Fh) + if err != 0 { + return nil, fuse.Status(err) + } + return fuse.ReadResultData(buf[:n]), 0 +} + +func (fs *fileSystem) Release(cancel <-chan struct{}, in *fuse.ReleaseIn) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + fs.v.Release(ctx, Ino(in.NodeId), in.Fh) +} + +func (fs *fileSystem) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Write(ctx, Ino(in.NodeId), data, in.Offset, in.Fh) + if err != 0 { + return 0, fuse.Status(err) + } + return uint32(len(data)), 0 +} + +func (fs *fileSystem) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Flush(ctx, Ino(in.NodeId), in.Fh, in.LockOwner) + return fuse.Status(err) +} + +func (fs *fileSystem) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Fsync(ctx, Ino(in.NodeId), int(in.FsyncFlags), in.Fh) + return fuse.Status(err) +} + +func (fs *fileSystem) Fallocate(cancel <-chan struct{}, in *fuse.FallocateIn) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Fallocate(ctx, Ino(in.NodeId), uint8(in.Mode), int64(in.Offset), int64(in.Length), in.Fh) + return fuse.Status(err) +} + +func (fs *fileSystem) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + copied, err := fs.v.CopyFileRange(ctx, Ino(in.NodeId), in.FhIn, in.OffIn, Ino(in.NodeIdOut), in.FhOut, in.OffOut, in.Len, uint32(in.Flags)) + if err != 0 { + return 0, fuse.Status(err) + } + return uint32(copied), 0 +} + +func (fs *fileSystem) GetLk(cancel <-chan struct{}, in *fuse.LkIn, out *fuse.LkOut) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + l := in.Lk + err := fs.v.Getlk(ctx, Ino(in.NodeId), in.Fh, in.Owner, &l.Start, &l.End, &l.Typ, &l.Pid) + if err == 0 { + out.Lk = l + } + return fuse.Status(err) +} + +func (fs *fileSystem) SetLk(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) { + return fs.setLk(cancel, in, false) +} + +func (fs *fileSystem) SetLkw(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) { + return fs.setLk(cancel, in, true) +} + +func (fs *fileSystem) setLk(cancel <-chan struct{}, in *fuse.LkIn, block bool) (code fuse.Status) { + if in.LkFlags&fuse.FUSE_LK_FLOCK != 0 { + return fs.Flock(cancel, in, block) + } + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + l := in.Lk + err := fs.v.Setlk(ctx, Ino(in.NodeId), in.Fh, in.Owner, l.Start, l.End, l.Typ, l.Pid, block) + return fuse.Status(err) +} + +func (fs *fileSystem) Flock(cancel <-chan struct{}, in *fuse.LkIn, block bool) (code fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + err := fs.v.Flock(ctx, Ino(in.NodeId), in.Fh, in.Owner, in.Lk.Typ, block) + return fuse.Status(err) +} + +func (fs *fileSystem) OpenDir(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + fh, err := fs.v.Opendir(ctx, Ino(in.NodeId)) + out.Fh = fh + return fuse.Status(err) +} + +func (fs *fileSystem) ReadDir(cancel <-chan struct{}, in *fuse.ReadIn, out *fuse.DirEntryList) fuse.Status { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entries, err := fs.v.Readdir(ctx, Ino(in.NodeId), in.Size, int(in.Offset), in.Fh, false) + var de fuse.DirEntry + for _, e := range entries { + de.Ino = uint64(e.Inode) + de.Name = string(e.Name) + de.Mode = e.Attr.SMode() + if !out.AddDirEntry(de) { + break + } + } + return fuse.Status(err) +} + +func (fs *fileSystem) ReadDirPlus(cancel <-chan struct{}, in *fuse.ReadIn, out *fuse.DirEntryList) fuse.Status { + ctx := newContext(cancel, &in.InHeader) + defer releaseContext(ctx) + entries, err := fs.v.Readdir(ctx, Ino(in.NodeId), in.Size, int(in.Offset), in.Fh, true) + var de fuse.DirEntry + for _, e := range entries { + de.Ino = uint64(e.Inode) + de.Name = string(e.Name) + de.Mode = e.Attr.SMode() + eo := out.AddDirLookupEntry(de) + if eo == nil { + break + } + if e.Attr.Full { + fs.v.UpdateLength(e.Inode, e.Attr) + fs.replyEntry(eo, e) + } else { + eo.Ino = uint64(e.Inode) + eo.Generation = 1 + } + } + return fuse.Status(err) +} + +var cancelReleaseDir = make(chan struct{}) + +func (fs *fileSystem) ReleaseDir(in *fuse.ReleaseIn) { + ctx := newContext(cancelReleaseDir, &in.InHeader) + defer releaseContext(ctx) + fs.v.Releasedir(ctx, Ino(in.NodeId), in.Fh) +} + +func (fs *fileSystem) StatFs(cancel <-chan struct{}, in *fuse.InHeader, out *fuse.StatfsOut) (code fuse.Status) { + ctx := newContext(cancel, in) + defer releaseContext(ctx) + st, err := fs.v.StatFS(ctx, Ino(in.NodeId)) + if err != 0 { + return fuse.Status(err) + } + out.NameLen = 255 + out.Frsize = 4096 + out.Bsize = 4096 + out.Blocks = st.Total / uint64(out.Bsize) + if out.Blocks < 1 { + out.Blocks = 1 + } + out.Bavail = st.Avail / uint64(out.Bsize) + out.Bfree = out.Bavail + out.Files = st.Files + out.Ffree = st.Favail + return 0 +} + +// Serve starts a server to serve requests from FUSE. +func Serve(v *vfs.VFS, options string, xattrs bool) error { + if err := syscall.Setpriority(syscall.PRIO_PROCESS, os.Getpid(), -19); err != nil { + logger.Warnf("setpriority: %s", err) + } + + conf := v.Conf + imp := newFileSystem(conf, v) + + var opt fuse.MountOptions + opt.FsName = "JuiceFS:" + conf.Format.Name + opt.Name = "juicefs" + opt.SingleThreaded = false + opt.MaxBackground = 50 + opt.EnableLocks = true + opt.DisableXAttrs = !xattrs + opt.IgnoreSecurityLabels = true + opt.MaxWrite = 1 << 20 + opt.MaxReadAhead = 1 << 20 + opt.DirectMount = true + opt.AllowOther = os.Getuid() == 0 + for _, n := range strings.Split(options, ",") { + if n == "allow_other" || n == "allow_root" { + opt.AllowOther = true + } else if n == "nonempty" { + } else if n == "debug" { + opt.Debug = true + } else if n == "writeback_cache" || n == "writeback" { + opt.EnableWriteback = true + } else if strings.TrimSpace(n) != "" { + opt.Options = append(opt.Options, n) + } + } + opt.Options = append(opt.Options, "default_permissions") + if runtime.GOOS == "darwin" { + opt.Options = append(opt.Options, "fssubtype=juicefs") + opt.Options = append(opt.Options, "volname="+conf.Format.Name) + opt.Options = append(opt.Options, "daemon_timeout=60", "iosize=65536", "novncache") + } + fssrv, err := fuse.NewServer(imp, conf.Mountpoint, &opt) + if err != nil { + return fmt.Errorf("fuse: %s", err) + } + + fssrv.Serve() + return nil +} diff --git a/pkg/fuse/fuse_darwin.go b/pkg/fuse/fuse_darwin.go new file mode 100644 index 0000000..db7fa1f --- /dev/null +++ b/pkg/fuse/fuse_darwin.go @@ -0,0 +1,28 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fuse + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +func getUmask(in *fuse.MknodIn) uint16 { + return 0 +} + +func setBlksize(out *fuse.Attr, size uint32) { +} diff --git a/pkg/fuse/fuse_linux.go b/pkg/fuse/fuse_linux.go new file mode 100644 index 0000000..d6988f9 --- /dev/null +++ b/pkg/fuse/fuse_linux.go @@ -0,0 +1,29 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fuse + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +func getUmask(in *fuse.MknodIn) uint16 { + return uint16(in.Umask) +} + +func setBlksize(out *fuse.Attr, size uint32) { + out.Blksize = size +} diff --git a/pkg/fuse/fuse_test.go b/pkg/fuse/fuse_test.go new file mode 100644 index 0000000..ae14b95 --- /dev/null +++ b/pkg/fuse/fuse_test.go @@ -0,0 +1,305 @@ +//go:build linux +// +build linux + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint:errcheck +package fuse + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + "github.com/gofrs/flock" + "github.com/google/uuid" + "github.com/hanwen/go-fuse/v2/posixtest" + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/vfs" + "github.com/pkg/xattr" +) + +func format(url string) { + m := meta.NewClient(url, &meta.Config{}) + format := meta.Format{ + Name: "test", + UUID: uuid.New().String(), + Storage: "file", + Bucket: os.TempDir() + "/", + BlockSize: 4096, + } + err := m.Init(format, true) + if err != nil { + log.Fatalf("format: %s", err) + } +} + +func mount(url, mp string) { + if err := os.MkdirAll(mp, 0777); err != nil { + log.Fatalf("create %s: %s", mp, err) + } + + metaConf := &meta.Config{ + Retries: 10, + Strict: true, + MountPoint: mp, + } + m := meta.NewClient(url, metaConf) + format, err := m.Load() + if err != nil { + log.Fatalf("load setting: %s", err) + } + + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + MaxUpload: 20, + BufferSize: 300 << 20, + CacheSize: 1024, + CacheDir: "memory", + } + + blob, err := object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey) + if err != nil { + log.Fatalf("object storage: %s", err) + } + blob = object.WithPrefix(blob, format.Name+"/") + store := chunk.NewCachedStore(blob, chunkConf) + + m.OnMsg(meta.CompactChunk, meta.MsgCallback(func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + })) + + conf := &vfs.Config{ + Meta: metaConf, + Format: format, + Mountpoint: mp, + Chunk: &chunkConf, + } + + err = m.NewSession() + if err != nil { + log.Fatalf("new session: %s", err) + } + + conf.AttrTimeout = time.Second + conf.EntryTimeout = time.Second + conf.DirEntryTimeout = time.Second + conf.HideInternal = true + v := vfs.NewVFS(conf, m, store) + err = Serve(v, "", true) + if err != nil { + log.Fatalf("fuse server err: %s\n", err) + } + _ = m.CloseSession() +} + +func umount(mp string, force bool) { + var cmd *exec.Cmd + if _, err := exec.LookPath("fusermount"); err == nil { + if force { + cmd = exec.Command("fusermount", "-uz", mp) + } else { + cmd = exec.Command("fusermount", "-u", mp) + } + } else { + if force { + cmd = exec.Command("umount", "-l", mp) + } else { + cmd = exec.Command("umount", mp) + } + } + + out, err := cmd.CombinedOutput() + if err != nil { + log.Print(string(out)) + } +} + +func waitMountpoint(mp string) chan error { + ch := make(chan error, 1) + for i := 0; i < 20; i++ { + time.Sleep(time.Millisecond * 500) + st, err := os.Stat(mp) + if err == nil { + if sys, ok := st.Sys().(*syscall.Stat_t); ok && sys.Ino == 1 { + ch <- nil + return ch + } + } + } + ch <- errors.New("not ready in 10 seconds") + return ch +} + +func setUp(metaUrl, mp string) error { + format(metaUrl) + go mount(metaUrl, mp) + return <-waitMountpoint(mp) +} + +func cleanup(mp string) { + parent, err := os.Open(mp) + if err != nil { + return + } + defer parent.Close() + names, err := parent.Readdirnames(-1) + if err != nil { + return + } + for _, n := range names { + os.RemoveAll(filepath.Join(mp, n)) + } +} + +func StatFS(t *testing.T, mp string) { + var st syscall.Statfs_t + if err := syscall.Statfs(mp, &st); err != nil { + t.Fatal(err) + } + if st.Bsize != 4096 { + t.Fatalf("bsize should be 4096 but got %d ", st.Bsize) + } + if st.Blocks-st.Bavail != 0 { + t.Fatalf("used blocks should be 0 but got %d", st.Blocks-st.Bavail) + } + if st.Files-st.Ffree != 0 { + t.Fatalf("used files should be 0 but got %d", st.Files) + } +} + +func Xattrs(t *testing.T, mp string) { + path := filepath.Join(mp, "myfile") + ioutil.WriteFile(path, []byte(""), 0644) + + const prefix = "user." + var value = []byte("test-attr-value") + if err := xattr.Set(path, prefix+"test", value); err != nil { + t.Fatal(err) + } + if _, err := xattr.List(path); err != nil { + t.Fatal(err) + } + + if data, err := xattr.Get(path, prefix+"test"); err != nil { + t.Fatal(err) + } else if !bytes.Equal(data, value) { + t.Fatalf("expect %v bot got %v", value, data) + } + if err := xattr.Remove(path, prefix+"test"); err != nil { + t.Fatal(err) + } + // One can also specify the flags parameter to be passed to the OS. + if err := xattr.SetWithFlags(path, prefix+"test", []byte("test-attr-value2"), xattr.XATTR_CREATE); err != nil { + t.Fatal(err) + } +} + +func Flock(t *testing.T, mp string) { + path := filepath.Join(mp, "go-lock.lock") + ioutil.WriteFile(path, []byte(""), 0644) + + fileLock := flock.New(path) + locked, err := fileLock.TryLock() + if err != nil { + t.Fatalf("try lock: %s", err) + } + if locked { + fileLock.Unlock() + } else { + t.Fatal("no lock") + } +} + +func PosixLock(t *testing.T, mp string) { + path := filepath.Join(mp, "go-lock.lock") + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + f.WriteString("hello") + if err := f.Sync(); err != nil { + t.Fatalf("fsync: %s", err) + } + var fl syscall.Flock_t + fl.Pid = int32(os.Getpid()) + fl.Type = syscall.F_WRLCK + fl.Whence = io.SeekStart + err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl) + for err == syscall.EAGAIN { + err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl) + } + if err != nil { + t.Fatalf("lock: %s", err) + } + if err = syscall.FcntlFlock(f.Fd(), syscall.F_GETLK, &fl); err != nil { + t.Fatalf("getlk: %s", err) + } + if int(fl.Pid) != os.Getpid() { + t.Fatalf("pid: %d != %d", fl.Pid, os.Getpid()) + } + fl.Type = syscall.F_UNLCK + if err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl); err != nil { + t.Fatalf("unlock: %s", err) + } +} + +func TestFUSE(t *testing.T) { + f, err := os.CreateTemp("", "meta") + if err != nil { + t.Fatal(err) + } + defer os.Remove(f.Name()) + metaUrl := "sqlite3://" + f.Name() + mp, err := os.MkdirTemp("", "mp") + if err != nil { + t.Fatal(err) + } + err = setUp(metaUrl, mp) + if err != nil { + t.Fatalf("setup: %s", err) + } + defer umount(mp, true) + + t.Run("StatFS", func(t *testing.T) { + StatFS(t, mp) + }) + posixtest.All["Xattrs"] = Xattrs + posixtest.All["Flock"] = Flock + posixtest.All["POSIXLock"] = PosixLock + for c, f := range posixtest.All { + cleanup(mp) + t.Run(c, func(t *testing.T) { + f(t, mp) + }) + } +} diff --git a/pkg/fuse/utils.go b/pkg/fuse/utils.go new file mode 100644 index 0000000..7d55856 --- /dev/null +++ b/pkg/fuse/utils.go @@ -0,0 +1,55 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fuse + +import ( + "github.com/juicedata/juicefs/pkg/meta" + + "github.com/hanwen/go-fuse/v2/fuse" +) + +func attrToStat(inode Ino, attr *Attr, out *fuse.Attr) { + out.Ino = uint64(inode) + out.Uid = attr.Uid + out.Gid = attr.Gid + out.Mode = attr.SMode() + out.Nlink = attr.Nlink + out.Atime = uint64(attr.Atime) + out.Atimensec = attr.Atimensec + out.Mtime = uint64(attr.Mtime) + out.Mtimensec = attr.Mtimensec + out.Ctime = uint64(attr.Ctime) + out.Ctimensec = attr.Ctimensec + + var size, blocks uint64 + switch attr.Typ { + case meta.TypeDirectory: + fallthrough + case meta.TypeSymlink: + fallthrough + case meta.TypeFile: + size = attr.Length + blocks = (size + 511) / 512 + case meta.TypeBlockDev: + fallthrough + case meta.TypeCharDev: + out.Rdev = attr.Rdev + } + out.Size = size + out.Blocks = blocks + setBlksize(out, 0x10000) +} diff --git a/pkg/gateway/gateway.go b/pkg/gateway/gateway.go new file mode 100644 index 0000000..96fe911 --- /dev/null +++ b/pkg/gateway/gateway.go @@ -0,0 +1,909 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package gateway + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "path" + "sort" + "strconv" + "strings" + "sync" + "syscall" + "time" + + "github.com/minio/minio-go/pkg/s3utils" + minio "github.com/minio/minio/cmd" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/fs" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/vfs" +) + +const ( + sep = "/" + metaBucket = ".sys" +) + +var mctx meta.Context +var logger = utils.GetLogger("juicefs") + +func NewJFSGateway(conf *vfs.Config, m meta.Meta, store chunk.ChunkStore, multiBucket, keepEtag bool) (minio.ObjectLayer, error) { + jfs, err := fs.NewFileSystem(conf, m, store) + if err != nil { + return nil, fmt.Errorf("Initialize failed: %s", err) + } + mctx = meta.NewContext(uint32(os.Getpid()), uint32(os.Getuid()), []uint32{uint32(os.Getgid())}) + return &jfsObjects{fs: jfs, conf: conf, listPool: minio.NewTreeWalkPool(time.Minute * 30), multiBucket: multiBucket, keepEtag: keepEtag}, nil +} + +type jfsObjects struct { + minio.GatewayUnsupported + conf *vfs.Config + fs *fs.FileSystem + listPool *minio.TreeWalkPool + multiBucket bool + keepEtag bool +} + +func (n *jfsObjects) IsCompressionSupported() bool { + return n.conf.Chunk.Compress != "" && n.conf.Chunk.Compress != "none" +} + +func (n *jfsObjects) IsEncryptionSupported() bool { + return false +} + +// IsReady returns whether the layer is ready to take requests. +func (n *jfsObjects) IsReady(_ context.Context) bool { + return true +} + +func (n *jfsObjects) Shutdown(ctx context.Context) error { + return n.fs.Close() +} + +func (n *jfsObjects) StorageInfo(ctx context.Context) (info minio.StorageInfo, errors []error) { + sinfo := minio.StorageInfo{} + sinfo.Backend.Type = minio.BackendGateway + sinfo.Backend.GatewayOnline = true + return sinfo, nil +} + +func jfsToObjectErr(ctx context.Context, err error, params ...string) error { + if err == nil { + return nil + } + bucket := "" + object := "" + uploadID := "" + switch len(params) { + case 3: + uploadID = params[2] + fallthrough + case 2: + object = params[1] + fallthrough + case 1: + bucket = params[0] + } + + if eno, ok := err.(syscall.Errno); !ok { + logger.Errorf("error: %s bucket: %s, object: %s, uploadID: %s", err, bucket, object, uploadID) + return err + } else if eno == 0 { + return nil + } + + switch { + case fs.IsNotExist(err): + if uploadID != "" { + return minio.InvalidUploadID{ + UploadID: uploadID, + } + } + if object != "" { + return minio.ObjectNotFound{Bucket: bucket, Object: object} + } + return minio.BucketNotFound{Bucket: bucket} + case fs.IsExist(err): + if object != "" { + return minio.PrefixAccessDenied{Bucket: bucket, Object: object} + } + return minio.BucketAlreadyOwnedByYou{Bucket: bucket} + case fs.IsNotEmpty(err): + if object != "" { + return minio.PrefixAccessDenied{Bucket: bucket, Object: object} + } + return minio.BucketNotEmpty{Bucket: bucket} + default: + logger.Errorf("other error: %s bucket: %s, object: %s, uploadID: %s", err, bucket, object, uploadID) + return err + } +} + +// isValidBucketName verifies whether a bucket name is valid. +func (n *jfsObjects) isValidBucketName(bucket string) bool { + if !n.multiBucket && bucket != n.conf.Format.Name { + return false + } + return s3utils.CheckValidBucketNameStrict(bucket) == nil +} + +func (n *jfsObjects) path(p ...string) string { + if len(p) > 0 && p[0] == n.conf.Format.Name { + p = p[1:] + } + return sep + minio.PathJoin(p...) +} + +func (n *jfsObjects) tpath(p ...string) string { + return sep + metaBucket + n.path(p...) +} + +func (n *jfsObjects) upath(bucket, uploadID string) string { + return n.tpath(bucket, "uploads", uploadID) +} + +func (n *jfsObjects) ppath(bucket, uploadID, part string) string { + return n.tpath(bucket, "uploads", uploadID, part) +} + +func (n *jfsObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error { + if !n.isValidBucketName(bucket) { + return minio.BucketNameInvalid{Bucket: bucket} + } + if !n.multiBucket { + return minio.BucketNotEmpty{Bucket: bucket} + } + eno := n.fs.Delete(mctx, n.path(bucket)) + return jfsToObjectErr(ctx, eno, bucket) +} + +func (n *jfsObjects) MakeBucketWithLocation(ctx context.Context, bucket string, options minio.BucketOptions) error { + if !n.isValidBucketName(bucket) { + return minio.BucketNameInvalid{Bucket: bucket} + } + if !n.multiBucket { + return nil + } + eno := n.fs.Mkdir(mctx, n.path(bucket), 0755) + return jfsToObjectErr(ctx, eno, bucket) +} + +func (n *jfsObjects) GetBucketInfo(ctx context.Context, bucket string) (bi minio.BucketInfo, err error) { + if !n.isValidBucketName(bucket) { + return bi, minio.BucketNameInvalid{Bucket: bucket} + } + fi, eno := n.fs.Stat(mctx, n.path(bucket)) + if eno == 0 { + bi = minio.BucketInfo{ + Name: bucket, + Created: time.Unix(fi.Atime()/1000, 0), + } + } + return bi, jfsToObjectErr(ctx, eno, bucket) +} + +// Ignores all reserved bucket names or invalid bucket names. +func isReservedOrInvalidBucket(bucketEntry string, strict bool) bool { + if err := s3utils.CheckValidBucketName(bucketEntry); err != nil { + return true + } + return bucketEntry == metaBucket +} + +func (n *jfsObjects) ListBuckets(ctx context.Context) (buckets []minio.BucketInfo, err error) { + if !n.multiBucket { + fi, eno := n.fs.Stat(mctx, "/") + if eno != 0 { + return nil, jfsToObjectErr(ctx, eno) + } + buckets = []minio.BucketInfo{{ + Name: n.conf.Format.Name, + Created: time.Unix(fi.Atime()/1000, 0), + }} + return buckets, nil + } + f, eno := n.fs.Open(mctx, sep, 0) + if eno != 0 { + return nil, jfsToObjectErr(ctx, eno) + } + defer f.Close(mctx) + entries, eno := f.Readdir(mctx, 10000) + if eno != 0 { + return nil, jfsToObjectErr(ctx, eno) + } + + for _, entry := range entries { + // Ignore all reserved bucket names and invalid bucket names. + if isReservedOrInvalidBucket(entry.Name(), false) || !n.isValidBucketName(entry.Name()) { + continue + } + if entry.IsDir() { + buckets = append(buckets, minio.BucketInfo{ + Name: entry.Name(), + Created: time.Unix(entry.(*fs.FileStat).Atime()/1000, 0), + }) + } + } + + // Sort bucket infos by bucket name. + sort.Slice(buckets, func(i, j int) bool { + return buckets[i].Name < buckets[j].Name + }) + return buckets, nil +} + +func (n *jfsObjects) isObjectDir(ctx context.Context, bucket, object string) bool { + f, eno := n.fs.Open(mctx, n.path(bucket, object), 0) + if eno != 0 { + return false + } + defer f.Close(mctx) + + fis, err := f.Readdir(mctx, 0) + if err != 0 { + return false + } + return len(fis) == 0 +} + +func (n *jfsObjects) isLeafDir(bucket, leafPath string) bool { + return n.isObjectDir(context.Background(), bucket, leafPath) +} + +func (n *jfsObjects) isLeaf(bucket, leafPath string) bool { + return !strings.HasSuffix(leafPath, "/") +} + +func (n *jfsObjects) listDirFactory() minio.ListDirFunc { + return func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool) { + f, eno := n.fs.Open(mctx, n.path(bucket, prefixDir), 0) + if eno != 0 { + return fs.IsNotExist(eno), nil, false + } + defer f.Close(mctx) + fis, eno := f.Readdir(mctx, 0) + if eno != 0 { + return + } + if len(fis) == 0 { + return true, nil, false + } + root := n.path(bucket, prefixDir) == "/" + for _, fi := range fis { + if root && len(fi.Name()) == len(metaBucket) && fi.Name() == metaBucket { + continue + } + if fi.IsDir() { + entries = append(entries, fi.Name()+sep) + } else { + entries = append(entries, fi.Name()) + } + } + entries, delayIsLeaf = minio.FilterListEntries(bucket, prefixDir, entries, prefixEntry, n.isLeaf) + return false, entries, delayIsLeaf + } +} + +func (n *jfsObjects) checkBucket(ctx context.Context, bucket string) error { + if !n.isValidBucketName(bucket) { + return minio.BucketNameInvalid{Bucket: bucket} + } + if _, eno := n.fs.Stat(mctx, n.path(bucket)); eno != 0 { + return jfsToObjectErr(ctx, eno, bucket) + } + return nil +} + +// ListObjects lists all blobs in JFS bucket filtered by prefix. +func (n *jfsObjects) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi minio.ListObjectsInfo, err error) { + if err := n.checkBucket(ctx, bucket); err != nil { + return loi, err + } + getObjectInfo := func(ctx context.Context, bucket, object string) (obj minio.ObjectInfo, err error) { + fi, eno := n.fs.Stat(mctx, n.path(bucket, object)) + if eno == 0 { + obj = minio.ObjectInfo{ + Bucket: bucket, + Name: object, + ModTime: fi.ModTime(), + Size: fi.Size(), + IsDir: fi.IsDir(), + AccTime: fi.ModTime(), + } + } + return obj, jfsToObjectErr(ctx, eno, bucket, object) + } + + if maxKeys == 0 { + maxKeys = -1 // list as many objects as possible + } + return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), n.isLeaf, n.isLeafDir, getObjectInfo, getObjectInfo) +} + +// ListObjectsV2 lists all blobs in JFS bucket filtered by prefix +func (n *jfsObjects) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, + fetchOwner bool, startAfter string) (loi minio.ListObjectsV2Info, err error) { + if !n.isValidBucketName(bucket) { + return minio.ListObjectsV2Info{}, minio.BucketNameInvalid{Bucket: bucket} + } + // fetchOwner is not supported and unused. + marker := continuationToken + if marker == "" { + marker = startAfter + } + resultV1, err := n.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys) + if err == nil { + loi = minio.ListObjectsV2Info{ + Objects: resultV1.Objects, + Prefixes: resultV1.Prefixes, + ContinuationToken: continuationToken, + NextContinuationToken: resultV1.NextMarker, + IsTruncated: resultV1.IsTruncated, + } + } + return loi, err +} + +func (n *jfsObjects) DeleteObject(ctx context.Context, bucket, object string, options minio.ObjectOptions) (info minio.ObjectInfo, err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + info.Bucket = bucket + info.Name = object + p := n.path(bucket, object) + root := n.path(bucket) + for p != root { + if eno := n.fs.Delete(mctx, p); eno != 0 { + if fs.IsNotEmpty(eno) { + err = nil + } else { + err = eno + } + break + } + p = path.Dir(p) + } + return info, jfsToObjectErr(ctx, err, bucket, object) +} + +func (n *jfsObjects) DeleteObjects(ctx context.Context, bucket string, objects []minio.ObjectToDelete, options minio.ObjectOptions) (objs []minio.DeletedObject, errs []error) { + for _, object := range objects { + _, err := n.DeleteObject(ctx, bucket, object.ObjectName, options) + if err == nil { + objs = append(objs, minio.DeletedObject{ObjectName: object.ObjectName}) + } else { + errs = append(errs, err) + } + } + return +} + +type fReader struct { + *fs.File +} + +func (f *fReader) Read(b []byte) (int, error) { + return f.File.Read(mctx, b) +} + +func (n *jfsObjects) GetObjectNInfo(ctx context.Context, bucket, object string, rs *minio.HTTPRangeSpec, h http.Header, lockType minio.LockType, opts minio.ObjectOptions) (gr *minio.GetObjectReader, err error) { + objInfo, err := n.GetObjectInfo(ctx, bucket, object, opts) + if err != nil { + return nil, err + } + + var startOffset, length int64 + startOffset, length, err = rs.GetOffsetLength(objInfo.Size) + if err != nil { + return + } + f, eno := n.fs.Open(mctx, n.path(bucket, object), 0) + if eno != 0 { + return nil, jfsToObjectErr(ctx, eno, bucket, object) + } + _, _ = f.Seek(mctx, startOffset, 0) + r := &io.LimitedReader{R: &fReader{f}, N: length} + closer := func() { _ = f.Close(mctx) } + return minio.NewGetObjectReaderFromReader(r, objInfo, opts, closer) +} + +func (n *jfsObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (info minio.ObjectInfo, err error) { + if err = n.checkBucket(ctx, srcBucket); err != nil { + return + } + if err = n.checkBucket(ctx, dstBucket); err != nil { + return + } + dst := n.path(dstBucket, dstObject) + src := n.path(srcBucket, srcObject) + if minio.IsStringEqual(src, dst) { + return n.GetObjectInfo(ctx, srcBucket, srcObject, minio.ObjectOptions{}) + } + tmp := n.tpath(dstBucket, "tmp", minio.MustGetUUID()) + _ = n.mkdirAll(ctx, path.Dir(tmp), 0755) + _, eno := n.fs.Create(mctx, tmp, 0644) + if eno != 0 { + logger.Errorf("create %s: %s", tmp, eno) + return + } + defer func() { _ = n.fs.Delete(mctx, tmp) }() + + _, eno = n.fs.CopyFileRange(mctx, src, 0, tmp, 0, 1<<63) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, srcBucket, srcObject) + logger.Errorf("copy %s to %s: %s", src, tmp, err) + return + } + eno = n.fs.Rename(mctx, tmp, dst, 0) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, srcBucket, srcObject) + logger.Errorf("rename %s to %s: %s", tmp, dst, err) + return + } + fi, eno := n.fs.Stat(mctx, dst) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, dstBucket, dstObject) + return + } + + var etag []byte + if n.keepEtag { + etag, _ = n.fs.GetXattr(mctx, src, s3Etag) + if len(etag) != 0 { + eno = n.fs.SetXattr(mctx, dst, s3Etag, etag, 0) + if eno != 0 { + logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", dst, s3Etag, etag, 0) + } + } + } + + return minio.ObjectInfo{ + Bucket: dstBucket, + Name: dstObject, + ETag: string(etag), + ModTime: fi.ModTime(), + Size: fi.Size(), + IsDir: fi.IsDir(), + AccTime: fi.ModTime(), + }, nil +} + +var buffPool = sync.Pool{ + New: func() interface{} { + buf := make([]byte, 1<<17) + return &buf + }, +} + +func (n *jfsObjects) GetObject(ctx context.Context, bucket, object string, startOffset, length int64, writer io.Writer, etag string, opts minio.ObjectOptions) (err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + f, eno := n.fs.Open(mctx, n.path(bucket, object), vfs.MODE_MASK_R) + if eno != 0 { + return jfsToObjectErr(ctx, eno, bucket, object) + } + defer func() { _ = f.Close(mctx) }() + var buf = buffPool.Get().(*[]byte) + defer buffPool.Put(buf) + _, _ = f.Seek(mctx, startOffset, 0) + for length > 0 { + l := int64(len(*buf)) + if l > length { + l = length + } + n, e := f.Read(mctx, (*buf)[:l]) + if n == 0 { + if e != io.EOF { + err = e + } + break + } + if _, err = writer.Write((*buf)[:n]); err != nil { + break + } + length -= int64(n) + } + return jfsToObjectErr(ctx, err, bucket, object) +} + +func (n *jfsObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + fi, eno := n.fs.Stat(mctx, n.path(bucket, object)) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, bucket, object) + return + } + if strings.HasSuffix(object, sep) && !fi.IsDir() { + err = jfsToObjectErr(ctx, syscall.ENOENT, bucket, object) + return + } + var etag []byte + if n.keepEtag { + etag, _ = n.fs.GetXattr(mctx, n.path(bucket, object), s3Etag) + } + return minio.ObjectInfo{ + Bucket: bucket, + Name: object, + ModTime: fi.ModTime(), + Size: fi.Size(), + IsDir: fi.IsDir(), + AccTime: fi.ModTime(), + ETag: string(etag), + }, nil +} + +func (n *jfsObjects) mkdirAll(ctx context.Context, p string, mode os.FileMode) error { + if fi, eno := n.fs.Stat(mctx, p); eno == 0 { + if !fi.IsDir() { + return fmt.Errorf("%s is not directory", p) + } + return nil + } + eno := n.fs.Mkdir(mctx, p, uint16(mode)) + if eno != 0 && fs.IsNotExist(eno) { + if err := n.mkdirAll(ctx, path.Dir(p), 0755); err != nil { + return err + } + eno = n.fs.Mkdir(mctx, p, uint16(mode)) + } + if eno != 0 && fs.IsExist(eno) { + eno = 0 + } + if eno == 0 { + return nil + } + return eno +} + +func (n *jfsObjects) putObject(ctx context.Context, bucket, object string, r *minio.PutObjReader, opts minio.ObjectOptions) (err error) { + tmpname := n.tpath(bucket, "tmp", minio.MustGetUUID()) + _ = n.mkdirAll(ctx, path.Dir(tmpname), 0755) + f, eno := n.fs.Create(mctx, tmpname, 0644) + if eno != 0 { + logger.Errorf("create %s: %s", tmpname, eno) + err = eno + return + } + defer func() { _ = n.fs.Delete(mctx, tmpname) }() + var buf = buffPool.Get().(*[]byte) + defer buffPool.Put(buf) + for { + var n int + n, err = io.ReadFull(r, *buf) + if n == 0 { + if err == io.EOF { + err = nil + } + break + } + _, eno := f.Write(mctx, (*buf)[:n]) + if eno != 0 { + err = eno + break + } + } + if err == nil { + eno = f.Close(mctx) + if eno != 0 { + err = eno + } + } else { + _ = f.Close(mctx) + } + if err != nil { + return + } + dir := path.Dir(object) + if dir != "" { + _ = n.mkdirAll(ctx, dir, os.FileMode(0755)) + } + if eno := n.fs.Rename(mctx, tmpname, object, 0); eno != 0 { + err = jfsToObjectErr(ctx, eno, bucket, object) + return + } + return +} + +func (n *jfsObjects) PutObject(ctx context.Context, bucket string, object string, r *minio.PutObjReader, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + + p := n.path(bucket, object) + if strings.HasSuffix(object, sep) { + if err = n.mkdirAll(ctx, p, os.FileMode(0755)); err != nil { + err = jfsToObjectErr(ctx, err, bucket, object) + return + } + if r.Size() > 0 { + err = minio.ObjectExistsAsDirectory{ + Bucket: bucket, + Object: object, + Err: syscall.EEXIST, + } + return + } + } else if err = n.putObject(ctx, bucket, p, r, opts); err != nil { + return + } + fi, eno := n.fs.Stat(mctx, p) + if eno != 0 { + return objInfo, jfsToObjectErr(ctx, eno, bucket, object) + } + etag := r.MD5CurrentHexString() + if n.keepEtag { + eno = n.fs.SetXattr(mctx, p, s3Etag, []byte(etag), 0) + if eno != 0 { + logger.Errorf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", p, s3Etag, etag, 0) + } + } + return minio.ObjectInfo{ + Bucket: bucket, + Name: object, + ETag: etag, + ModTime: fi.ModTime(), + Size: fi.Size(), + IsDir: fi.IsDir(), + AccTime: fi.ModTime(), + }, nil +} + +func (n *jfsObjects) NewMultipartUpload(ctx context.Context, bucket string, object string, opts minio.ObjectOptions) (uploadID string, err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + uploadID = minio.MustGetUUID() + p := n.upath(bucket, uploadID) + err = n.mkdirAll(ctx, p, os.FileMode(0755)) + if err == nil { + eno := n.fs.SetXattr(mctx, p, uploadKeyName, []byte(object), 0) + if eno != 0 { + logger.Warnf("set object %s on upload %s: %s", object, uploadID, eno) + } + } + return +} + +const uploadKeyName = "s3-object" +const s3Etag = "s3-etag" + +func (n *jfsObjects) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi minio.ListMultipartsInfo, err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + f, eno := n.fs.Open(mctx, n.tpath(bucket, "uploads"), 0) + if eno != 0 { + return // no found + } + defer f.Close(mctx) + entries, eno := f.ReaddirPlus(mctx, 0) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, bucket) + return + } + lmi.Prefix = prefix + lmi.KeyMarker = keyMarker + lmi.UploadIDMarker = uploadIDMarker + lmi.MaxUploads = maxUploads + for _, e := range entries { + uploadID := string(e.Name) + if uploadID > uploadIDMarker { + object_, _ := n.fs.GetXattr(mctx, n.upath(bucket, uploadID), uploadKeyName) + object := string(object_) + if strings.HasPrefix(object, prefix) && object > keyMarker { + lmi.Uploads = append(lmi.Uploads, minio.MultipartInfo{ + Object: object, + UploadID: uploadID, + Initiated: time.Unix(e.Attr.Atime, int64(e.Attr.Atimensec)), + }) + } + } + } + if len(lmi.Uploads) > maxUploads { + lmi.IsTruncated = true + lmi.Uploads = lmi.Uploads[:maxUploads] + lmi.NextKeyMarker = keyMarker + lmi.NextUploadIDMarker = lmi.Uploads[maxUploads-1].UploadID + } + return lmi, jfsToObjectErr(ctx, err, bucket) +} + +func (n *jfsObjects) checkUploadIDExists(ctx context.Context, bucket, object, uploadID string) (err error) { + if err = n.checkBucket(ctx, bucket); err != nil { + return + } + _, eno := n.fs.Stat(mctx, n.upath(bucket, uploadID)) + return jfsToObjectErr(ctx, eno, bucket, object, uploadID) +} + +func (n *jfsObjects) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int, opts minio.ObjectOptions) (result minio.ListPartsInfo, err error) { + if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { + return result, err + } + f, e := n.fs.Open(mctx, n.upath(bucket, uploadID), 0) + if e != 0 { + err = jfsToObjectErr(ctx, e, bucket, object, uploadID) + return + } + defer func() { _ = f.Close(mctx) }() + entries, e := f.ReaddirPlus(mctx, 0) + if e != 0 { + err = jfsToObjectErr(ctx, e, bucket, object, uploadID) + return + } + result.Bucket = bucket + result.Object = object + result.UploadID = uploadID + result.PartNumberMarker = partNumberMarker + result.MaxParts = maxParts + for _, entry := range entries { + num, er := strconv.Atoi(string(entry.Name)) + if er == nil && num > partNumberMarker { + etag, _ := n.fs.GetXattr(mctx, n.ppath(bucket, uploadID, string(entry.Name)), s3Etag) + result.Parts = append(result.Parts, minio.PartInfo{ + PartNumber: num, + Size: int64(entry.Attr.Length), + LastModified: time.Unix(entry.Attr.Mtime, 0), + ETag: string(etag), + }) + } + } + sort.Slice(result.Parts, func(i, j int) bool { + return result.Parts[i].PartNumber < result.Parts[j].PartNumber + }) + if len(result.Parts) > maxParts { + result.IsTruncated = true + result.Parts = result.Parts[:maxParts] + result.NextPartNumberMarker = result.Parts[maxParts-1].PartNumber + } + return +} + +func (n *jfsObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, + startOffset int64, length int64, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (result minio.PartInfo, err error) { + if !n.isValidBucketName(srcBucket) { + err = minio.BucketNameInvalid{Bucket: srcBucket} + return + } + if err = n.checkUploadIDExists(ctx, dstBucket, dstObject, uploadID); err != nil { + return + } + // TODO: use CopyFileRange + return n.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, srcInfo.PutObjReader, dstOpts) +} + +func (n *jfsObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, r *minio.PutObjReader, opts minio.ObjectOptions) (info minio.PartInfo, err error) { + if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { + return + } + p := n.ppath(bucket, uploadID, strconv.Itoa(partID)) + if err = n.putObject(ctx, bucket, p, r, opts); err != nil { + err = jfsToObjectErr(ctx, err, bucket, object) + return + } + etag := r.MD5CurrentHexString() + if n.fs.SetXattr(mctx, p, s3Etag, []byte(etag), 0) != 0 { + logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", p, s3Etag, etag, 0) + } + info.PartNumber = partID + info.ETag = etag + info.LastModified = minio.UTCNow() + info.Size = r.Reader.Size() + return +} + +func (n *jfsObjects) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts minio.ObjectOptions) (result minio.MultipartInfo, err error) { + if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { + return + } + result.Bucket = bucket + result.Object = object + result.UploadID = uploadID + return +} + +func (n *jfsObjects) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []minio.CompletePart, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { + if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { + return + } + + tmp := n.ppath(bucket, uploadID, "complete") + _ = n.fs.Delete(mctx, tmp) + _, eno := n.fs.Create(mctx, tmp, 0755) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, bucket, object, uploadID) + logger.Errorf("create complete: %s", err) + return + } + var total uint64 + for _, part := range parts { + p := n.ppath(bucket, uploadID, strconv.Itoa(part.PartNumber)) + copied, eno := n.fs.CopyFileRange(mctx, p, 0, tmp, total, 1<<30) + if eno != 0 { + err = jfsToObjectErr(ctx, eno, bucket, object, uploadID) + logger.Errorf("merge parts: %s", err) + return + } + total += copied + } + + name := n.path(bucket, object) + dir := path.Dir(name) + if dir != "" { + if err = n.mkdirAll(ctx, dir, os.FileMode(0755)); err != nil { + _ = n.fs.Delete(mctx, tmp) + err = jfsToObjectErr(ctx, err, bucket, object, uploadID) + return + } + } + + eno = n.fs.Rename(mctx, tmp, name, 0) + if eno != 0 { + _ = n.fs.Delete(mctx, tmp) + err = jfsToObjectErr(ctx, eno, bucket, object, uploadID) + logger.Errorf("Rename %s -> %s: %s", tmp, name, err) + return + } + + fi, eno := n.fs.Stat(mctx, name) + if eno != 0 { + _ = n.fs.Delete(mctx, name) + err = jfsToObjectErr(ctx, eno, bucket, object, uploadID) + return + } + + // remove parts + _ = n.fs.Rmr(mctx, n.upath(bucket, uploadID)) + + // Calculate s3 compatible md5sum for complete multipart. + s3MD5 := minio.ComputeCompleteMultipartMD5(parts) + if n.keepEtag { + eno = n.fs.SetXattr(mctx, name, s3Etag, []byte(s3MD5), 0) + if eno != 0 { + logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", name, s3Etag, s3MD5, 0) + } + } + return minio.ObjectInfo{ + Bucket: bucket, + Name: object, + ETag: s3MD5, + ModTime: fi.ModTime(), + Size: fi.Size(), + IsDir: fi.IsDir(), + AccTime: fi.ModTime(), + }, nil +} + +func (n *jfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, option minio.ObjectOptions) (err error) { + if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { + return + } + eno := n.fs.Rmr(mctx, n.upath(bucket, uploadID)) + return jfsToObjectErr(ctx, eno, bucket, object, uploadID) +} diff --git a/pkg/meta/base.go b/pkg/meta/base.go new file mode 100644 index 0000000..7ae3457 --- /dev/null +++ b/pkg/meta/base.go @@ -0,0 +1,800 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "fmt" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/utils" +) + +const ( + inodeBatch = 100 + chunkIDBatch = 1000 +) + +type engine interface { + incrCounter(name string, value int64) (int64, error) + + doCleanStaleSession(sid uint64) + doDeleteSustainedInode(sid uint64, inode Ino) error + doDeleteFileData(inode Ino, length uint64) + doDeleteSlice(chunkid uint64, size uint32) error + + doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno + doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno + doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno + doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno + doUnlink(ctx Context, parent Ino, name string) syscall.Errno + doRmdir(ctx Context, parent Ino, name string) syscall.Errno + doReadlink(ctx Context, inode Ino) ([]byte, error) + doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) syscall.Errno + doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno + GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno + SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno +} + +type baseMeta struct { + sync.Mutex + conf *Config + fmt Format + + root Ino + subTrash internalNode + sid uint64 + of *openfiles + removedFiles map[Ino]bool + compacting map[uint64]bool + deleting chan int + symlinks *sync.Map + msgCallbacks *msgCallbacks + newSpace int64 + newInodes int64 + usedSpace int64 + usedInodes int64 + umounting bool + + freeMu sync.Mutex + freeInodes freeID + freeChunks freeID + + en engine +} + +func newBaseMeta(conf *Config) baseMeta { + if conf.Retries == 0 { + conf.Retries = 30 + } + return baseMeta{ + conf: conf, + root: 1, + of: newOpenFiles(conf.OpenCache), + removedFiles: make(map[Ino]bool), + compacting: make(map[uint64]bool), + deleting: make(chan int, conf.MaxDeletes), + symlinks: &sync.Map{}, + msgCallbacks: &msgCallbacks{ + callbacks: make(map[uint32]MsgCallback), + }, + } +} + +func (m *baseMeta) checkRoot(inode Ino) Ino { + if inode == 1 { + return m.root + } + return inode +} + +func (r *baseMeta) OnMsg(mtype uint32, cb MsgCallback) { + r.msgCallbacks.Lock() + defer r.msgCallbacks.Unlock() + r.msgCallbacks.callbacks[mtype] = cb +} + +func (r *baseMeta) newMsg(mid uint32, args ...interface{}) error { + r.msgCallbacks.Lock() + cb, ok := r.msgCallbacks.callbacks[mid] + r.msgCallbacks.Unlock() + if ok { + return cb(args...) + } + return fmt.Errorf("message %d is not supported", mid) +} + +func (m *baseMeta) CloseSession() error { + if m.conf.ReadOnly { + return nil + } + m.Lock() + m.umounting = true + m.Unlock() + m.en.doCleanStaleSession(m.sid) + return nil +} + +func (m *baseMeta) refreshUsage() { + for { + if v, err := m.en.incrCounter(usedSpace, 0); err == nil { + atomic.StoreInt64(&m.usedSpace, v) + } + if v, err := m.en.incrCounter(totalInodes, 0); err == nil { + atomic.StoreInt64(&m.usedInodes, v) + } + time.Sleep(time.Second * 10) + } +} + +func (m *baseMeta) checkQuota(size, inodes int64) bool { + if size > 0 && m.fmt.Capacity > 0 && atomic.LoadInt64(&m.usedSpace)+atomic.LoadInt64(&m.newSpace)+size > int64(m.fmt.Capacity) { + return true + } + return inodes > 0 && m.fmt.Inodes > 0 && atomic.LoadInt64(&m.usedInodes)+atomic.LoadInt64(&m.newInodes)+inodes > int64(m.fmt.Inodes) +} + +func (m *baseMeta) updateStats(space int64, inodes int64) { + atomic.AddInt64(&m.newSpace, space) + atomic.AddInt64(&m.newInodes, inodes) +} + +func (m *baseMeta) flushStats() { + for { + newSpace := atomic.SwapInt64(&m.newSpace, 0) + if newSpace != 0 { + if _, err := m.en.incrCounter(usedSpace, newSpace); err != nil { + logger.Warnf("update space stats: %s", err) + m.updateStats(newSpace, 0) + } + } + newInodes := atomic.SwapInt64(&m.newInodes, 0) + if newInodes != 0 { + if _, err := m.en.incrCounter(totalInodes, newInodes); err != nil { + logger.Warnf("update inodes stats: %s", err) + m.updateStats(0, newInodes) + } + } + time.Sleep(time.Second) + } +} + +func (m *baseMeta) StatFS(ctx Context, totalspace, availspace, iused, iavail *uint64) syscall.Errno { + defer timeit(time.Now()) + var used, inodes int64 + var err error + err = utils.WithTimeout(func() error { + used, err = m.en.incrCounter(usedSpace, 0) + return err + }, time.Millisecond*150) + if err != nil { + used = atomic.LoadInt64(&m.usedSpace) + } + err = utils.WithTimeout(func() error { + inodes, err = m.en.incrCounter(totalInodes, 0) + return err + }, time.Millisecond*150) + if err != nil { + inodes = atomic.LoadInt64(&m.usedInodes) + } + used += atomic.LoadInt64(&m.newSpace) + inodes += atomic.LoadInt64(&m.newInodes) + if used < 0 { + used = 0 + } + if m.fmt.Capacity > 0 { + *totalspace = m.fmt.Capacity + if *totalspace < uint64(used) { + *totalspace = uint64(used) + } + } else { + *totalspace = 1 << 50 + for *totalspace*8 < uint64(used)*10 { + *totalspace *= 2 + } + } + *availspace = *totalspace - uint64(used) + if inodes < 0 { + inodes = 0 + } + *iused = uint64(inodes) + if m.fmt.Inodes > 0 { + if *iused > m.fmt.Inodes { + *iavail = 0 + } else { + *iavail = m.fmt.Inodes - *iused + } + } else { + *iavail = 10 << 20 + for *iused*10 > (*iused+*iavail)*8 { + *iavail *= 2 + } + } + return 0 +} + +func (m *baseMeta) resolveCase(ctx Context, parent Ino, name string) *Entry { + var entries []*Entry + _ = m.en.doReaddir(ctx, parent, 0, &entries) + for _, e := range entries { + n := string(e.Name) + if strings.EqualFold(name, n) { + return e + } + } + return nil +} + +func (m *baseMeta) Lookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno { + if inode == nil || attr == nil { + return syscall.EINVAL // bad request + } + defer timeit(time.Now()) + parent = m.checkRoot(parent) + if name == ".." { + if parent == m.root { + name = "." + } else { + if st := m.GetAttr(ctx, parent, attr); st != 0 { + return st + } + if attr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + *inode = attr.Parent + return m.GetAttr(ctx, *inode, attr) + } + } + if name == "." { + if st := m.GetAttr(ctx, parent, attr); st != 0 { + return st + } + if attr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + *inode = parent + return 0 + } + if parent == 1 && name == TrashName { + if st := m.GetAttr(ctx, TrashInode, attr); st != 0 { + return st + } + *inode = TrashInode + return 0 + } + st := m.en.doLookup(ctx, parent, name, inode, attr) + if st == syscall.ENOENT && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parent, name); e != nil { + *inode = e.Inode + if st = m.GetAttr(ctx, *inode, attr); st == syscall.ENOENT { + logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, e.Name) + *attr = *e.Attr + st = 0 + } + } + } + return st +} + +func (m *baseMeta) parseAttr(buf []byte, attr *Attr) { + if attr == nil { + return + } + rb := utils.FromBuffer(buf) + attr.Flags = rb.Get8() + attr.Mode = rb.Get16() + attr.Typ = uint8(attr.Mode >> 12) + attr.Mode &= 0xfff + attr.Uid = rb.Get32() + attr.Gid = rb.Get32() + attr.Atime = int64(rb.Get64()) + attr.Atimensec = rb.Get32() + attr.Mtime = int64(rb.Get64()) + attr.Mtimensec = rb.Get32() + attr.Ctime = int64(rb.Get64()) + attr.Ctimensec = rb.Get32() + attr.Nlink = rb.Get32() + attr.Length = rb.Get64() + attr.Rdev = rb.Get32() + if rb.Left() >= 8 { + attr.Parent = Ino(rb.Get64()) + } + attr.Full = true + logger.Tracef("attr: %+v -> %+v", buf, attr) +} + +func (m *baseMeta) marshal(attr *Attr) []byte { + w := utils.NewBuffer(36 + 24 + 4 + 8) + w.Put8(attr.Flags) + w.Put16((uint16(attr.Typ) << 12) | (attr.Mode & 0xfff)) + w.Put32(attr.Uid) + w.Put32(attr.Gid) + w.Put64(uint64(attr.Atime)) + w.Put32(attr.Atimensec) + w.Put64(uint64(attr.Mtime)) + w.Put32(attr.Mtimensec) + w.Put64(uint64(attr.Ctime)) + w.Put32(attr.Ctimensec) + w.Put32(attr.Nlink) + w.Put64(attr.Length) + w.Put32(attr.Rdev) + w.Put64(uint64(attr.Parent)) + logger.Tracef("attr: %+v -> %+v", attr, w.Bytes()) + return w.Bytes() +} + +func clearSUGID(ctx Context, cur *Attr, set *Attr) { + switch runtime.GOOS { + case "darwin": + if ctx.Uid() != 0 { + // clear SUID and SGID + cur.Mode &= 01777 + set.Mode &= 01777 + } + case "linux": + // same as ext + if cur.Typ != TypeDirectory { + if ctx.Uid() != 0 || (cur.Mode>>3)&1 != 0 { + // clear SUID and SGID + cur.Mode &= 01777 + set.Mode &= 01777 + } else { + // keep SGID if the file is non-group-executable + cur.Mode &= 03777 + set.Mode &= 03777 + } + } + } +} + +func (r *baseMeta) Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno { + return syscall.ENOTSUP +} + +func (m *baseMeta) Access(ctx Context, inode Ino, mmask uint8, attr *Attr) syscall.Errno { + if ctx.Uid() == 0 { + return 0 + } + if attr == nil || !attr.Full { + if attr == nil { + attr = &Attr{} + } + err := m.GetAttr(ctx, inode, attr) + if err != 0 { + return err + } + } + mode := accessMode(attr, ctx.Uid(), ctx.Gids()) + if mode&mmask != mmask { + logger.Debugf("Access inode %d %o, mode %o, request mode %o", inode, attr.Mode, mode, mmask) + return syscall.EACCES + } + return 0 +} + +func (m *baseMeta) GetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { + inode = m.checkRoot(inode) + if m.conf.OpenCache > 0 && m.of.Check(inode, attr) { + return 0 + } + defer timeit(time.Now()) + var err syscall.Errno + if inode == 1 { + e := utils.WithTimeout(func() error { + err = m.en.doGetAttr(ctx, inode, attr) + return nil + }, time.Millisecond*300) + if e != nil || err != 0 { + err = 0 + attr.Typ = TypeDirectory + attr.Mode = 0777 + attr.Nlink = 2 + attr.Length = 4 << 10 + } + } else { + err = m.en.doGetAttr(ctx, inode, attr) + } + if err == 0 { + m.of.Update(inode, attr) + } + return err +} + +func (m *baseMeta) nextInode() (Ino, error) { + m.freeMu.Lock() + defer m.freeMu.Unlock() + if m.freeInodes.next >= m.freeInodes.maxid { + v, err := m.en.incrCounter("nextInode", inodeBatch) + if err != nil { + return 0, err + } + m.freeInodes.next = uint64(v) - inodeBatch + m.freeInodes.maxid = uint64(v) + } + n := m.freeInodes.next + m.freeInodes.next++ + for n <= 1 { + n = m.freeInodes.next + m.freeInodes.next++ + } + return Ino(n), nil +} + +func (m *baseMeta) Mknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, inode *Ino, attr *Attr) syscall.Errno { + if isTrash(parent) { + return syscall.EPERM + } + if parent == 1 && name == TrashName { + return syscall.EPERM + } + defer timeit(time.Now()) + return m.en.doMknod(ctx, parent, name, _type, mode, cumask, rdev, "", inode, attr) +} + +func (m *baseMeta) Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32, inode *Ino, attr *Attr) syscall.Errno { + if isTrash(parent) { + return syscall.EPERM + } + if parent == 1 && name == TrashName { + return syscall.EPERM + } + defer timeit(time.Now()) + if attr == nil { + attr = &Attr{} + } + err := m.en.doMknod(ctx, parent, name, TypeFile, mode, cumask, 0, "", inode, attr) + if err == syscall.EEXIST && (flags&syscall.O_EXCL) == 0 && attr.Typ == TypeFile { + err = 0 + } + if err == 0 && inode != nil { + m.of.Open(*inode, attr) + } + return err +} + +func (m *baseMeta) Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16, copysgid uint8, inode *Ino, attr *Attr) syscall.Errno { + if isTrash(parent) { + return syscall.EPERM + } + if parent == 1 && name == TrashName { + return syscall.EPERM + } + defer timeit(time.Now()) + return m.en.doMknod(ctx, parent, name, TypeDirectory, mode, cumask, 0, "", inode, attr) +} + +func (m *baseMeta) Symlink(ctx Context, parent Ino, name string, path string, inode *Ino, attr *Attr) syscall.Errno { + if isTrash(parent) { + return syscall.EPERM + } + if parent == 1 && name == TrashName { + return syscall.EPERM + } + defer timeit(time.Now()) + return m.en.doMknod(ctx, parent, name, TypeSymlink, 0644, 022, 0, path, inode, attr) +} + +func (m *baseMeta) Link(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno { + if isTrash(parent) { + return syscall.EPERM + } + if parent == 1 && name == TrashName { + return syscall.EPERM + } + defer timeit(time.Now()) + parent = m.checkRoot(parent) + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + return m.en.doLink(ctx, inode, parent, name, attr) +} + +func (m *baseMeta) ReadLink(ctx Context, inode Ino, path *[]byte) syscall.Errno { + if target, ok := m.symlinks.Load(inode); ok { + *path = target.([]byte) + return 0 + } + defer timeit(time.Now()) + target, err := m.en.doReadlink(ctx, inode) + if err != nil { + return errno(err) + } + if len(target) == 0 { + return syscall.ENOENT + } + *path = target + m.symlinks.Store(inode, target) + return 0 +} + +func (m *baseMeta) Unlink(ctx Context, parent Ino, name string) syscall.Errno { + if parent == 1 && name == TrashName || isTrash(parent) && ctx.Uid() != 0 { + return syscall.EPERM + } + defer timeit(time.Now()) + parent = m.checkRoot(parent) + return m.en.doUnlink(ctx, parent, name) +} + +func (m *baseMeta) Rmdir(ctx Context, parent Ino, name string) syscall.Errno { + if name == "." { + return syscall.EINVAL + } + if name == ".." { + return syscall.ENOTEMPTY + } + if parent == 1 && name == TrashName || parent == TrashInode || isTrash(parent) && ctx.Uid() != 0 { + return syscall.EPERM + } + defer timeit(time.Now()) + parent = m.checkRoot(parent) + return m.en.doRmdir(ctx, parent, name) +} + +func (m *baseMeta) Rename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno { + if parentSrc == 1 && nameSrc == TrashName || parentDst == 1 && nameDst == TrashName { + return syscall.EPERM + } + if isTrash(parentDst) || isTrash(parentSrc) && ctx.Uid() != 0 { + return syscall.EPERM + } + switch flags { + case 0, RenameNoReplace, RenameExchange: + case RenameWhiteout, RenameNoReplace | RenameWhiteout: + return syscall.ENOTSUP + default: + return syscall.EINVAL + } + defer timeit(time.Now()) + parentSrc = m.checkRoot(parentSrc) + parentDst = m.checkRoot(parentDst) + return m.en.doRename(ctx, parentSrc, nameSrc, parentDst, nameDst, flags, inode, attr) +} + +func (m *baseMeta) Open(ctx Context, inode Ino, flags uint32, attr *Attr) syscall.Errno { + if m.conf.ReadOnly && flags&(syscall.O_WRONLY|syscall.O_RDWR|syscall.O_TRUNC|syscall.O_APPEND) != 0 { + return syscall.EROFS + } + if m.conf.OpenCache > 0 && m.of.OpenCheck(inode, attr) { + return 0 + } + var err syscall.Errno + // attr may be valid, see fs.Open() + if attr != nil && !attr.Full { + err = m.GetAttr(ctx, inode, attr) + } + if err == 0 { + m.of.Open(inode, attr) + } + return err +} + +func (m *baseMeta) InvalidateChunkCache(ctx Context, inode Ino, indx uint32) syscall.Errno { + m.of.InvalidateChunk(inode, indx) + return 0 +} + +func (m *baseMeta) NewChunk(ctx Context, chunkid *uint64) syscall.Errno { + m.freeMu.Lock() + defer m.freeMu.Unlock() + if m.freeChunks.next >= m.freeChunks.maxid { + v, err := m.en.incrCounter("nextChunk", chunkIDBatch) + if err != nil { + return errno(err) + } + m.freeChunks.next = uint64(v) - chunkIDBatch + m.freeChunks.maxid = uint64(v) + } + *chunkid = m.freeChunks.next + m.freeChunks.next++ + return 0 +} + +func (m *baseMeta) Close(ctx Context, inode Ino) syscall.Errno { + if m.of.Close(inode) { + m.Lock() + defer m.Unlock() + if m.removedFiles[inode] { + delete(m.removedFiles, inode) + go func() { + _ = m.en.doDeleteSustainedInode(m.sid, inode) + }() + } + } + return 0 +} + +func (m *baseMeta) Readdir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) syscall.Errno { + inode = m.checkRoot(inode) + var attr Attr + if err := m.GetAttr(ctx, inode, &attr); err != 0 { + return err + } + defer timeit(time.Now()) + if inode == m.root { + attr.Parent = m.root + } + *entries = []*Entry{ + { + Inode: inode, + Name: []byte("."), + Attr: &Attr{Typ: TypeDirectory}, + }, + } + *entries = append(*entries, &Entry{ + Inode: attr.Parent, + Name: []byte(".."), + Attr: &Attr{Typ: TypeDirectory}, + }) + return m.en.doReaddir(ctx, inode, plus, entries) +} + +func (m *baseMeta) fileDeleted(opened bool, inode Ino, length uint64) { + if opened { + m.Lock() + m.removedFiles[inode] = true + m.Unlock() + } else { + go m.en.doDeleteFileData(inode, length) + } +} + +func (m *baseMeta) deleteSlice(chunkid uint64, size uint32) { + if m.conf.MaxDeletes == 0 { + return + } + m.deleting <- 1 + defer func() { <-m.deleting }() + err := m.newMsg(DeleteChunk, chunkid, size) + if err != nil { + logger.Warnf("delete chunk %d (%d bytes): %s", chunkid, size, err) + } else { + err := m.en.doDeleteSlice(chunkid, size) + if err != nil { + logger.Errorf("delete slice %d: %s", chunkid, err) + } + } +} + +func (m *baseMeta) toTrash(parent Ino) bool { + return m.fmt.TrashDays > 0 && !isTrash(parent) +} + +func (m *baseMeta) checkTrash(parent Ino, trash *Ino) syscall.Errno { + if !m.toTrash(parent) { + return 0 + } + name := time.Now().UTC().Format("2006-01-02-15") + m.Lock() + defer m.Unlock() + if name == m.subTrash.name { + *trash = m.subTrash.inode + return 0 + } + m.Unlock() + + st := m.en.doLookup(Background, TrashInode, name, trash, nil) + if st == syscall.ENOENT { + st = m.en.doMknod(Background, TrashInode, name, TypeDirectory, 0555, 0, 0, "", trash, nil) + } + + m.Lock() + if st != 0 && st != syscall.EEXIST { + logger.Warnf("create subTrash %s: %s", name, st) + } else if *trash <= TrashInode { + logger.Warnf("invalid trash inode: %d", *trash) + st = syscall.EBADF + } else { + m.subTrash.inode = *trash + m.subTrash.name = name + st = 0 + } + return st +} + +func (m *baseMeta) cleanupTrash() { + ctx := Background + key := "lastCleanup" + for { + time.Sleep(time.Hour) + var value []byte + if st := m.en.GetXattr(ctx, TrashInode, key, &value); st != 0 && st != ENOATTR { + logger.Warnf("getxattr inode %d key %s: %s", TrashInode, key, st) + continue + } + + var last time.Time + var err error + if len(value) > 0 { + last, err = time.Parse(time.RFC3339, string(value)) + } + if err != nil { + logger.Warnf("parse time value %s: %s", value, err) + continue + } + if now := time.Now(); now.Sub(last) >= time.Hour { + if st := m.en.SetXattr(ctx, TrashInode, key, []byte(now.Format(time.RFC3339)), XattrCreateOrReplace); st != 0 { + logger.Warnf("setxattr inode %d key %s: %s", TrashInode, key, st) + continue + } + go m.doCleanupTrash(false) + } + } +} + +func (m *baseMeta) doCleanupTrash(force bool) { + logger.Debugf("cleanup trash: started") + ctx := Background + now := time.Now() + var st syscall.Errno + var entries []*Entry + if st = m.en.doReaddir(ctx, TrashInode, 0, &entries); st != 0 { + logger.Warnf("readdir trash %d: %s", TrashInode, st) + return + } + sort.Slice(entries, func(i, j int) bool { return entries[i].Inode < entries[j].Inode }) + var count int + defer func() { + if count > 0 { + logger.Infof("cleanup trash: deleted %d files in %v", count, time.Since(now)) + } + }() + + edge := now.Add(-time.Duration(24*m.fmt.TrashDays+1) * time.Hour) + for _, e := range entries { + ts, err := time.Parse("2006-01-02-15", string(e.Name)) + if err != nil { + logger.Warnf("bad entry as a subTrash: %s", e.Name) + continue + } + if ts.Before(edge) || force { + var subEntries []*Entry + if st = m.en.doReaddir(ctx, e.Inode, 0, &subEntries); st != 0 { + logger.Warnf("readdir subTrash %d: %s", e.Inode, st) + continue + } + rmdir := true + for _, se := range subEntries { + if se.Attr.Typ == TypeDirectory { + st = m.en.doRmdir(ctx, e.Inode, string(se.Name)) + } else { + st = m.en.doUnlink(ctx, e.Inode, string(se.Name)) + } + if st == 0 { + count++ + } else { + logger.Warnf("delete from trash %s/%s: %s", e.Name, se.Name, st) + rmdir = false + continue + } + if count%10000 == 0 && time.Since(now) > 50*time.Minute { + return + } + } + if rmdir { + if st = m.en.doRmdir(ctx, TrashInode, string(e.Name)); st != 0 { + logger.Warnf("rmdir subTrash %s: %s", e.Name, st) + } + } + } else { + break + } + } +} diff --git a/pkg/meta/benchmarks_test.go b/pkg/meta/benchmarks_test.go new file mode 100644 index 0000000..69e8f0a --- /dev/null +++ b/pkg/meta/benchmarks_test.go @@ -0,0 +1,654 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "fmt" + "syscall" + "testing" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/sirupsen/logrus" +) + +const ( + redisAddr = "redis://127.0.0.1/1" + sqlAddr = "sqlite3://juicefs.db" + // sqlAddr = "mysql://root:@/juicefs" // MySQL + // sqlAddr = "mysql://root:@tcp(127.0.0.1:4000)/juicefs" // TiDB + tkvAddr = "badger://test_db" + // tkvAddr = "tikv://127.0.0.1:2379/juicefs" +) + +func init() { + utils.SetLogLevel(logrus.InfoLevel) + // utils.SetOutFile("bench-test.log") +} + +func encodeSlices(size int) []string { + w := utils.NewBuffer(24) + w.Put32(0) + w.Put64(1014) + w.Put32(122) + w.Put32(0) + w.Put32(122) + v := string(w.Bytes()) + vals := make([]string, size) + for i := range vals { + vals[i] = v + } + return vals +} + +func encodeSlicesAsBuf(nSlices uint32) []byte { + w := utils.NewBuffer(nSlices * sliceBytes) + for i := uint32(0); i < nSlices; i++ { + w.Put32(0) + w.Put64(1014) + w.Put32(122) + w.Put32(0) + w.Put32(122) + } + return w.Bytes() +} + +func BenchmarkReadSlices(b *testing.B) { + cases := []struct { + desc string + size int + }{ + {"small", 4}, + {"mid", 64}, + {"large", 1024}, + } + for _, c := range cases { + b.Run(c.desc, func(b *testing.B) { + vals := encodeSlices(c.size) + b.ResetTimer() + var slices []*slice + for i := 0; i < b.N; i++ { + slices = readSlices(vals) + } + if len(slices) != len(vals) { + b.Fail() + } + }) + } +} + +func BenchmarkReadSliceBuf(b *testing.B) { + cases := []struct { + desc string + size uint32 + }{ + {"small", 4}, + {"mid", 64}, + {"large", 1024}, + } + for _, c := range cases { + b.Run(c.desc, func(b *testing.B) { + buf := encodeSlicesAsBuf(c.size) + b.ResetTimer() + var slices []*slice + for i := 0; i < b.N; i++ { + slices = readSliceBuf(buf) + } + if len(slices) != int(c.size) { + b.Fail() + } + }) + } +} + +func prepareParent(m Meta, name string, inode *Ino) error { + ctx := Background + if err := Remove(m, ctx, 1, name); err != 0 && err != syscall.ENOENT { + return fmt.Errorf("remove: %s", err) + } + if err := m.Mkdir(ctx, 1, name, 0755, 0, 0, inode, nil); err != 0 { + return fmt.Errorf("mkdir: %s", err) + } + return nil +} + +func benchMkdir(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchMkdir", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Mkdir(ctx, parent, fmt.Sprintf("d%d", i), 0755, 0, 0, nil, nil); err != 0 { + b.Fatalf("mkdir: %s", err) + } + } +} + +func benchMvdir(b *testing.B, m Meta) { // rename dir + var parent Ino + if err := prepareParent(m, "benchMvdir", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Mkdir(ctx, parent, "d0", 0755, 0, 0, nil, nil); err != 0 { + b.Fatalf("mkdir: %s", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Rename(ctx, parent, fmt.Sprintf("d%d", i), parent, fmt.Sprintf("d%d", i+1), 0, nil, nil); err != 0 { + b.Fatalf("rename dir: %s", err) + } + } +} + +func benchRmdir(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchRmdir", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := m.Mkdir(ctx, parent, "dir", 0755, 0, 0, nil, nil); err != 0 { + b.Fatalf("mkdir: %s", err) + } + b.StartTimer() + if err := m.Rmdir(ctx, parent, "dir"); err != 0 { + b.Fatalf("rmdir: %s", err) + } + } +} + +func benchResolve(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchResolve", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var child Ino = parent + for i := 0; i < 5; i++ { + if err := m.Mkdir(ctx, child, "d", 0755, 0, 0, &child, nil); err != 0 { + b.Fatalf("mkdir: %s", err) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Resolve(ctx, parent, "d/d/d/d/d", nil, nil); err != 0 { + if err == syscall.ENOTSUP { + b.SkipNow() + return + } + b.Fatalf("resolve: %s", err) + } + } +} + +func benchReaddir(b *testing.B, m Meta, n int) { + var parent Ino + if err := prepareParent(m, "benchReaddir", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + for j := 0; j < n; j++ { + if err := m.Create(ctx, parent, fmt.Sprintf("f%d", j), 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("create: %s", err) + } + } + var entries []*Entry + b.ResetTimer() + for i := 0; i < b.N; i++ { + entries = entries[:0] + if err := m.Readdir(ctx, parent, 1, &entries); err != 0 { + b.Fatalf("readdir: %s", err) + } + if len(entries) != n+2 { + b.Fatalf("files: %d != %d", len(entries), n+2) + } + } +} + +func benchMknod(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchMknod", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Mknod(ctx, parent, fmt.Sprintf("f%d", i), TypeFile, 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("mknod: %s", err) + } + } +} + +func benchCreate(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchCreate", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Create(ctx, parent, fmt.Sprintf("f%d", i), 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("create: %s", err) + } + } +} + +func benchRename(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchRename", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "f0", 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Rename(ctx, parent, fmt.Sprintf("f%d", i), parent, fmt.Sprintf("f%d", i+1), 0, nil, nil); err != 0 { + b.Fatalf("rename file: %s", err) + } + } +} + +func benchUnlink(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchUnlink", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := m.Create(ctx, parent, "file", 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.StartTimer() + if err := m.Unlink(ctx, parent, "file"); err != 0 { + b.Fatalf("unlink: %s", err) + } + } +} + +func benchLookup(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchLookup", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "file", 0644, 022, 0, nil, nil); err != 0 { + b.Fatalf("create: %s", err) + } + var inode Ino + var attr Attr + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Lookup(ctx, parent, "file", &inode, &attr); err != 0 { + b.Fatalf("lookup: %s", err) + } + } +} + +func benchGetAttr(b *testing.B, m Meta) { + var parent, inode Ino + if err := prepareParent(m, "benchGetAttr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + var attr Attr + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.GetAttr(ctx, inode, &attr); err != 0 { + b.Fatalf("getattr: %s", err) + } + } +} + +func benchSetAttr(b *testing.B, m Meta) { + var parent, inode Ino + if err := prepareParent(m, "benchSetAttr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + var attr = Attr{Mode: 0644} + b.ResetTimer() + for i := 0; i < b.N; i++ { + attr.Mode ^= 1 + if err := m.SetAttr(ctx, inode, SetAttrMode, 0, &attr); err != 0 { + b.Fatalf("setattr: %s", err) + } + } +} + +func benchAccess(b *testing.B, m Meta) { // contains a Getattr + var parent, inode Ino + if err := prepareParent(m, "benchAccess", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + myCtx := NewContext(100, 1, []uint32{1}) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Access(myCtx, inode, 4, nil); err != 0 && err != syscall.EACCES { + b.Fatalf("access: %s", err) + } + } +} + +func benchSetXattr(b *testing.B, m Meta) { + var parent, inode Ino + if err := prepareParent(m, "benchSetXattr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.ResetTimer() + value := []byte("value0") + for i := 0; i < b.N; i++ { + value[5] = byte(i%10 + 48) + if err := m.SetXattr(ctx, inode, "key", value, 0); err != 0 { + b.Fatalf("setxattr: %s", err) + } + } +} + +func benchGetXattr(b *testing.B, m Meta) { + var parent, inode Ino + if err := prepareParent(m, "benchGetXattr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + if err := m.SetXattr(ctx, inode, "key", []byte("value"), 0); err != 0 { + b.Fatalf("setxattr: %s", err) + } + var buf []byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.GetXattr(ctx, inode, "key", &buf); err != 0 { + b.Fatalf("getxattr: %s", err) + } + } +} + +func benchRemoveXattr(b *testing.B, m Meta) { + var parent, inode Ino + if err := prepareParent(m, "benchRemoveXattr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := m.SetXattr(ctx, inode, "key", []byte("value"), 0); err != 0 { + b.Fatalf("setxattr: %s", err) + } + b.StartTimer() + if err := m.RemoveXattr(ctx, inode, "key"); err != 0 { + b.Fatalf("removexattr: %s", err) + } + } +} + +func benchListXattr(b *testing.B, m Meta, n int) { + var parent, inode Ino + if err := prepareParent(m, "benchListXattr", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + for j := 0; j < n; j++ { + if err := m.SetXattr(ctx, inode, fmt.Sprintf("key%d", j), []byte("value"), 0); err != 0 { + b.Fatalf("setxattr: %s", err) + } + } + var buf []byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.ListXattr(ctx, inode, &buf); err != 0 { + b.Fatalf("removexattr: %s", err) + } + } +} + +func benchLink(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchLink", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var inode Ino + if err := m.Create(ctx, parent, "source", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Link(ctx, inode, parent, fmt.Sprintf("l%d", i), nil); err != 0 { + b.Fatalf("link: %s", err) + } + } +} + +func benchSymlink(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchSymlink", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var inode Ino + if err := m.Create(ctx, parent, "source", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Symlink(ctx, parent, fmt.Sprintf("s%d", i), "/benchSymlink/source", nil, nil); err != 0 { + b.Fatalf("symlink: %s", err) + } + } +} + +/* +func benchReadlink(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchReadlink", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var inode Ino + if err := m.Create(ctx, parent, "source", 0644, 022, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + if err := m.Symlink(ctx, parent, "slink", "/benchReadlink/source", &inode, nil); err != 0 { + b.Fatalf("symlink: %s", err) + } + var buf []byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.ReadLink(ctx, inode, &buf); err != 0 { + b.Fatalf("readlink: %s", err) + } + } +} +*/ + +func benchNewChunk(b *testing.B, m Meta) { + ctx := Background + var chunkid uint64 + for i := 0; i < b.N; i++ { + if err := m.NewChunk(ctx, &chunkid); err != 0 { + b.Fatalf("newchunk: %s", err) + } + } +} + +func benchWrite(b *testing.B, m Meta) { + var parent Ino + if err := prepareParent(m, "benchWrite", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var inode Ino + if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + var ( + chunkid uint64 + offset uint32 + step uint32 = 1024 + ) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.NewChunk(ctx, &chunkid); err != 0 { + b.Fatalf("newchunk: %s", err) + } + if err := m.Write(ctx, inode, 0, offset, Slice{Chunkid: chunkid, Size: step, Len: step}); err != 0 { + b.Fatalf("write: %s", err) + } + offset += step + if offset+step > ChunkSize { + offset = 0 + } + } +} + +func benchRead(b *testing.B, m Meta, n int) { + var parent Ino + if err := prepareParent(m, "benchRead", &parent); err != nil { + b.Fatal(err) + } + ctx := Background + var inode Ino + if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 { + b.Fatalf("create: %s", err) + } + var chunkid uint64 + var step uint32 = 1024 + for j := 0; j < n; j++ { + if err := m.NewChunk(ctx, &chunkid); err != 0 { + b.Fatalf("newchunk: %s", err) + } + if err := m.Write(ctx, inode, 0, uint32(j)*step, Slice{Chunkid: chunkid, Size: step, Len: step}); err != 0 { + b.Fatalf("write: %s", err) + } + } + var slices []Slice + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := m.Read(ctx, inode, 0, &slices); err != 0 { + b.Fatalf("read: %s", err) + } + } +} + +func benchmarkDir(b *testing.B, m Meta) { // mkdir, rename dir, rmdir, readdir + _ = m.Init(Format{Name: "benchmarkDir"}, true) + _ = m.NewSession() + b.Run("mkdir", func(b *testing.B) { benchMkdir(b, m) }) + b.Run("mvdir", func(b *testing.B) { benchMvdir(b, m) }) + b.Run("rmdir", func(b *testing.B) { benchRmdir(b, m) }) + b.Run("resolve", func(b *testing.B) { benchResolve(b, m) }) + b.Run("readdir_10", func(b *testing.B) { benchReaddir(b, m, 10) }) + b.Run("readdir_1k", func(b *testing.B) { benchReaddir(b, m, 1000) }) + // b.Run("readdir_100k", func(b *testing.B) { benchReaddir(b, m, 100000) }) +} + +func benchmarkFile(b *testing.B, m Meta) { + _ = m.Init(Format{Name: "benchmarkFile"}, true) + _ = m.NewSession() + b.Run("mknod", func(b *testing.B) { benchMknod(b, m) }) + b.Run("create", func(b *testing.B) { benchCreate(b, m) }) + b.Run("rename", func(b *testing.B) { benchRename(b, m) }) + b.Run("unlink", func(b *testing.B) { benchUnlink(b, m) }) + b.Run("lookup", func(b *testing.B) { benchLookup(b, m) }) + b.Run("getattr", func(b *testing.B) { benchGetAttr(b, m) }) + b.Run("setattr", func(b *testing.B) { benchSetAttr(b, m) }) + b.Run("access", func(b *testing.B) { benchAccess(b, m) }) +} + +func benchmarkXattr(b *testing.B, m Meta) { + _ = m.Init(Format{Name: "benchmarkXattr"}, true) + _ = m.NewSession() + b.Run("setxattr", func(b *testing.B) { benchSetXattr(b, m) }) + b.Run("getxattr", func(b *testing.B) { benchGetXattr(b, m) }) + b.Run("removexattr", func(b *testing.B) { benchRemoveXattr(b, m) }) + b.Run("listxattr_1", func(b *testing.B) { benchListXattr(b, m, 1) }) + b.Run("listxattr_10", func(b *testing.B) { benchListXattr(b, m, 10) }) +} + +func benchmarkLink(b *testing.B, m Meta) { + _ = m.Init(Format{Name: "benchmarkLink"}, true) + _ = m.NewSession() + b.Run("link", func(b *testing.B) { benchLink(b, m) }) + b.Run("symlink", func(b *testing.B) { benchSymlink(b, m) }) + // maybe meaningless since symlink would be cached + // b.Run("readlink", func(b *testing.B) { benchReadlink(b, m) }) +} + +func benchmarkData(b *testing.B, m Meta) { + _ = m.Init(Format{Name: "benchmarkData"}, true) + m.OnMsg(DeleteChunk, func(args ...interface{}) error { return nil }) + m.OnMsg(CompactChunk, func(args ...interface{}) error { return nil }) + _ = m.NewSession() + b.Run("newchunk", func(b *testing.B) { benchNewChunk(b, m) }) + b.Run("write", func(b *testing.B) { benchWrite(b, m) }) + b.Run("read_1", func(b *testing.B) { benchRead(b, m, 1) }) + b.Run("read_10", func(b *testing.B) { benchRead(b, m, 10) }) +} + +func benchmarkAll(b *testing.B, m Meta) { + benchmarkDir(b, m) + benchmarkFile(b, m) + benchmarkXattr(b, m) + benchmarkLink(b, m) + benchmarkData(b, m) +} + +func BenchmarkRedis(b *testing.B) { + m := NewClient(redisAddr, &Config{}) + benchmarkAll(b, m) +} + +func BenchmarkSQL(b *testing.B) { + m := NewClient(sqlAddr, &Config{}) + benchmarkAll(b, m) +} + +func BenchmarkTKV(b *testing.B) { + m := NewClient(tkvAddr, &Config{}) + benchmarkAll(b, m) +} diff --git a/pkg/meta/config.go b/pkg/meta/config.go new file mode 100644 index 0000000..63fdf97 --- /dev/null +++ b/pkg/meta/config.go @@ -0,0 +1,57 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "time" + +// Config for clients. +type Config struct { + Strict bool // update ctime + Retries int + CaseInsensi bool + ReadOnly bool + OpenCache time.Duration + MountPoint string + Subdir string + MaxDeletes int +} + +type Format struct { + Name string + UUID string + Storage string + Bucket string + AccessKey string + SecretKey string `json:",omitempty"` + BlockSize int + Compression string + Shards int + Partitions int + Capacity uint64 + Inodes uint64 + EncryptKey string `json:",omitempty"` + TrashDays int +} + +func (f *Format) RemoveSecret() { + if f.SecretKey != "" { + f.SecretKey = "removed" + } + if f.EncryptKey != "" { + f.EncryptKey = "removed" + } +} diff --git a/pkg/meta/config_test.go b/pkg/meta/config_test.go new file mode 100644 index 0000000..3bdf72f --- /dev/null +++ b/pkg/meta/config_test.go @@ -0,0 +1,28 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "testing" + +func TestRemoveSecret(t *testing.T) { + format := Format{Name: "test", SecretKey: "testSecret", EncryptKey: "testEncrypt"} + + format.RemoveSecret() + if format.SecretKey != "removed" || format.EncryptKey != "removed" { + t.Fatalf("invalid format: %+v", format) + } +} diff --git a/pkg/meta/context.go b/pkg/meta/context.go new file mode 100644 index 0000000..b7d1a25 --- /dev/null +++ b/pkg/meta/context.go @@ -0,0 +1,94 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "context" + "strconv" +) + +type Ino uint64 + +func (i Ino) String() string { + return strconv.FormatUint(uint64(i), 10) +} + +type CtxKey string + +type Context interface { + context.Context + Gid() uint32 + Gids() []uint32 + Uid() uint32 + Pid() uint32 + WithValue(k, v interface{}) + Cancel() + Canceled() bool +} + +type emptyContext struct { + context.Context +} + +func (ctx *emptyContext) Gid() uint32 { return 0 } +func (ctx *emptyContext) Gids() []uint32 { return []uint32{0} } +func (ctx *emptyContext) Uid() uint32 { return 0 } +func (ctx *emptyContext) Pid() uint32 { return 1 } +func (ctx *emptyContext) Cancel() {} +func (ctx *emptyContext) Canceled() bool { return false } +func (ctx *emptyContext) WithValue(k, v interface{}) { + ctx.Context = context.WithValue(ctx.Context, k, v) +} + +var Background Context = &emptyContext{context.Background()} + +type myContext struct { + context.Context + pid uint32 + uid uint32 + gids []uint32 +} + +func (c *myContext) Uid() uint32 { + return c.uid +} + +func (c *myContext) Gid() uint32 { + return c.gids[0] +} + +func (c *myContext) Gids() []uint32 { + return c.gids +} + +func (c *myContext) Pid() uint32 { + return c.pid +} + +func (c *myContext) Cancel() {} + +func (c *myContext) Canceled() bool { + return false +} + +func (c *myContext) WithValue(k, v interface{}) { + c.Context = context.WithValue(c.Context, k, v) +} + +func NewContext(pid, uid uint32, gids []uint32) Context { + return &myContext{context.Background(), pid, uid, gids} +} diff --git a/pkg/meta/dump.go b/pkg/meta/dump.go new file mode 100644 index 0000000..440254a --- /dev/null +++ b/pkg/meta/dump.go @@ -0,0 +1,278 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "strings" +) + +const ( + jsonIndent = " " + jsonWriteSize = 64 << 10 +) + +type DumpedCounters struct { + UsedSpace int64 `json:"usedSpace"` + UsedInodes int64 `json:"usedInodes"` + NextInode int64 `json:"nextInodes"` + NextChunk int64 `json:"nextChunk"` + NextSession int64 `json:"nextSession"` + NextTrash int64 `json:"nextTrash"` + NextCleanupSlices int64 `json:"nextCleanupSlices"` // deprecated, always 0 +} + +type DumpedDelFile struct { + Inode Ino `json:"inode"` + Length uint64 `json:"length"` + Expire int64 `json:"expire"` +} + +type DumpedSustained struct { + Sid uint64 `json:"sid"` + Inodes []Ino `json:"inodes"` +} + +type DumpedAttr struct { + Inode Ino `json:"inode"` + Type string `json:"type"` + Mode uint16 `json:"mode"` + Uid uint32 `json:"uid"` + Gid uint32 `json:"gid"` + Atime int64 `json:"atime"` + Mtime int64 `json:"mtime"` + Ctime int64 `json:"ctime"` + Atimensec uint32 `json:"atimensec"` + Mtimensec uint32 `json:"mtimensec"` + Ctimensec uint32 `json:"ctimensec"` + Nlink uint32 `json:"nlink"` + Length uint64 `json:"length"` + Rdev uint32 `json:"rdev,omitempty"` +} + +type DumpedSlice struct { + Chunkid uint64 `json:"chunkid"` + Pos uint32 `json:"pos"` + Size uint32 `json:"size"` + Off uint32 `json:"off"` + Len uint32 `json:"len"` +} + +type DumpedChunk struct { + Index uint32 `json:"index"` + Slices []*DumpedSlice `json:"slices"` +} + +type DumpedXattr struct { + Name string `json:"name"` + Value string `json:"value"` +} + +type DumpedEntry struct { + Name string `json:"-"` + Parent Ino `json:"-"` + Attr *DumpedAttr `json:"attr"` + Symlink string `json:"symlink,omitempty"` + Xattrs []*DumpedXattr `json:"xattrs,omitempty"` + Chunks []*DumpedChunk `json:"chunks,omitempty"` + Entries map[string]*DumpedEntry `json:"entries,omitempty"` +} + +func (de *DumpedEntry) writeJSON(bw *bufio.Writer, depth int) error { + prefix := strings.Repeat(jsonIndent, depth) + fieldPrefix := prefix + jsonIndent + write := func(s string) { + if _, err := bw.WriteString(s); err != nil { + panic(err) + } + } + write(fmt.Sprintf("\n%s\"%s\": {", prefix, de.Name)) + data, err := json.Marshal(de.Attr) + if err != nil { + return err + } + write(fmt.Sprintf("\n%s\"attr\": %s", fieldPrefix, data)) + if len(de.Symlink) > 0 { + write(fmt.Sprintf(",\n%s\"symlink\": \"%s\"", fieldPrefix, de.Symlink)) + } + if len(de.Xattrs) > 0 { + if data, err = json.Marshal(de.Xattrs); err != nil { + return err + } + write(fmt.Sprintf(",\n%s\"xattrs\": %s", fieldPrefix, data)) + } + if len(de.Chunks) == 1 { + if data, err = json.Marshal(de.Chunks); err != nil { + return err + } + write(fmt.Sprintf(",\n%s\"chunks\": %s", fieldPrefix, data)) + } else if len(de.Chunks) > 1 { + chunkPrefix := fieldPrefix + jsonIndent + write(fmt.Sprintf(",\n%s\"chunks\": [", fieldPrefix)) + for i, c := range de.Chunks { + if data, err = json.Marshal(c); err != nil { + return err + } + write(fmt.Sprintf("\n%s%s", chunkPrefix, data)) + if i != len(de.Chunks)-1 { + write(",") + } + } + write(fmt.Sprintf("\n%s]", fieldPrefix)) + } + write(fmt.Sprintf("\n%s}", prefix)) + return nil +} +func (de *DumpedEntry) writeJsonWithOutEntry(bw *bufio.Writer, depth int) error { + prefix := strings.Repeat(jsonIndent, depth) + fieldPrefix := prefix + jsonIndent + write := func(s string) { + if _, err := bw.WriteString(s); err != nil { + panic(err) + } + } + write(fmt.Sprintf("\n%s\"%s\": {", prefix, de.Name)) + data, err := json.Marshal(de.Attr) + if err != nil { + return err + } + write(fmt.Sprintf("\n%s\"attr\": %s", fieldPrefix, data)) + if len(de.Xattrs) > 0 { + if data, err = json.Marshal(de.Xattrs); err != nil { + return err + } + write(fmt.Sprintf(",\n%s\"xattrs\": %s", fieldPrefix, data)) + } + write(fmt.Sprintf(",\n%s\"entries\": {", fieldPrefix)) + return nil +} + +type DumpedMeta struct { + Setting *Format + Counters *DumpedCounters + Sustained []*DumpedSustained + DelFiles []*DumpedDelFile + FSTree *DumpedEntry `json:",omitempty"` + Trash *DumpedEntry `json:",omitempty"` +} + +func (dm *DumpedMeta) writeJsonWithOutTree(w io.Writer) (*bufio.Writer, error) { + if dm.FSTree != nil || dm.Trash != nil { + return nil, fmt.Errorf("invalid dumped meta") + } + data, err := json.MarshalIndent(dm, "", jsonIndent) + if err != nil { + return nil, err + } + bw := bufio.NewWriterSize(w, jsonWriteSize) + if _, err = bw.Write(append(data[:len(data)-2], ',')); err != nil { // delete \n} + return nil, err + } + return bw, nil +} + +func dumpAttr(a *Attr) *DumpedAttr { + d := &DumpedAttr{ + Type: typeToString(a.Typ), + Mode: a.Mode, + Uid: a.Uid, + Gid: a.Gid, + Atime: a.Atime, + Mtime: a.Mtime, + Ctime: a.Ctime, + Atimensec: a.Atimensec, + Mtimensec: a.Mtimensec, + Ctimensec: a.Ctimensec, + Nlink: a.Nlink, + Rdev: a.Rdev, + } + if a.Typ == TypeFile { + d.Length = a.Length + } + return d +} + +func loadAttr(d *DumpedAttr) *Attr { + return &Attr{ + // Flags: 0, + Typ: typeFromString(d.Type), + Mode: d.Mode, + Uid: d.Uid, + Gid: d.Gid, + Atime: d.Atime, + Mtime: d.Mtime, + Ctime: d.Ctime, + Atimensec: d.Atimensec, + Mtimensec: d.Mtimensec, + Ctimensec: d.Ctimensec, + Nlink: d.Nlink, + Rdev: d.Rdev, + Full: true, + } // Length and Parent not set +} + +func collectEntry(e *DumpedEntry, entries map[Ino]*DumpedEntry, showProgress func(totalIncr, currentIncr int64)) error { + typ := typeFromString(e.Attr.Type) + inode := e.Attr.Inode + if showProgress != nil { + if typ == TypeDirectory { + showProgress(int64(len(e.Entries)), 1) + } else { + showProgress(0, 1) + } + } + + if exist, ok := entries[inode]; ok { + attr := e.Attr + eattr := exist.Attr + if typ != TypeFile || typeFromString(eattr.Type) != TypeFile { + return fmt.Errorf("inode conflict: %d", inode) + } + eattr.Nlink++ + if eattr.Ctime*1e9+int64(eattr.Ctimensec) < attr.Ctime*1e9+int64(attr.Ctimensec) { + attr.Nlink = eattr.Nlink + entries[inode] = e + } + return nil + } + entries[inode] = e + + if typ == TypeFile { + e.Attr.Nlink = 1 // reset + } else if typ == TypeDirectory { + if inode == 1 || inode == TrashInode { // root or trash inode + e.Parent = 1 + } + e.Attr.Nlink = 2 + for name, child := range e.Entries { + child.Name = name + child.Parent = inode + if typeFromString(child.Attr.Type) == TypeDirectory { + e.Attr.Nlink++ + } + if err := collectEntry(child, entries, showProgress); err != nil { + return err + } + } + } else if e.Attr.Nlink != 1 { // nlink should be 1 for other types + return fmt.Errorf("invalid nlink %d for inode %d type %s", e.Attr.Nlink, inode, e.Attr.Type) + } + return nil +} diff --git a/pkg/meta/info.go b/pkg/meta/info.go new file mode 100644 index 0000000..7498a78 --- /dev/null +++ b/pkg/meta/info.go @@ -0,0 +1,109 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "fmt" + "strconv" + "strings" +) + +type redisVersion struct { + ver string + major, minor int +} + +var oldestSupportedVer = redisVersion{"4.0.x", 4, 0} + +func parseRedisVersion(v string) (ver redisVersion, err error) { + parts := strings.Split(v, ".") + if len(parts) < 2 { + err = fmt.Errorf("invalid redisVersion: %v", v) + return + } + ver.ver = v + ver.major, err = strconv.Atoi(parts[0]) + if err != nil { + return + } + ver.minor, err = strconv.Atoi(parts[1]) + return +} + +func (ver redisVersion) olderThan(v2 redisVersion) bool { + if ver.major < v2.major { + return true + } + if ver.major > v2.major { + return false + } + return ver.minor < v2.minor +} + +func (ver redisVersion) String() string { + return ver.ver +} + +type redisInfo struct { + aofEnabled bool + clusterEnabled bool + maxMemoryPolicy string + redisVersion string +} + +func checkRedisInfo(rawInfo string) (info redisInfo, err error) { + lines := strings.Split(strings.TrimSpace(rawInfo), "\n") + for _, l := range lines { + l = strings.TrimSpace(l) + if l == "" || strings.HasPrefix(l, "#") { + continue + } + kvPair := strings.SplitN(l, ":", 2) + if len(kvPair) < 2 { + continue + } + key, val := kvPair[0], kvPair[1] + switch key { + case "aof_enabled": + info.aofEnabled = val == "1" + if val == "0" { + logger.Warnf("AOF is not enabled, you may lose data if Redis is not shutdown properly.") + } + case "cluster_enabled": + info.clusterEnabled = val == "1" + if val != "0" { + logger.Warnf("Redis cluster is not supported, some operation may fail unexpected.") + } + case "maxmemory_policy": + info.maxMemoryPolicy = val + if val != "noeviction" { + logger.Warnf("maxmemory_policy is %q, please set it to 'noeviction'.", val) + } + case "redis_version": + info.redisVersion = val + ver, err := parseRedisVersion(val) + if err != nil { + logger.Warnf("Failed to parse Redis server version %q: %s", ver, err) + } else { + if ver.olderThan(oldestSupportedVer) { + logger.Warnf("Redis version should not be older than %s", oldestSupportedVer) + } + } + } + } + return +} diff --git a/pkg/meta/info_test.go b/pkg/meta/info_test.go new file mode 100644 index 0000000..0c535ba --- /dev/null +++ b/pkg/meta/info_test.go @@ -0,0 +1,264 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "testing" + +func TestOlderThan(t *testing.T) { + v := redisVersion{"2.2.10", 2, 2} + if !v.olderThan(redisVersion{"6.2", 6, 2}) { + t.Fatal("Expect true, got false.") + } + if !v.olderThan(redisVersion{"2.3", 2, 3}) { + t.Fatal("Expect true, got false.") + } + if v.olderThan(redisVersion{"2.2", 2, 2}) { + t.Fatal("Expect false, got true.") + } + if v.olderThan(redisVersion{"2.1", 2, 1}) { + t.Fatal("Expect false, got true.") + } + if v.olderThan(v) { + t.Fatal("Expect false, got true.") + } + if v.olderThan(redisVersion{}) { + t.Fatal("Expect false, got true.") + } +} + +func TestParseRedisVersion(t *testing.T) { + t.Run("Should return error for invalid redisVersion", func(t *testing.T) { + invalidVers := []string{"", "2.sadf.1", "3", "t.3.4"} + for _, v := range invalidVers { + _, err := parseRedisVersion(v) + if err == nil { + t.Fail() + } + } + }) + t.Run("Should parse redisVersion", func(t *testing.T) { + ver, err := parseRedisVersion("6.2.19") + if err != nil { + t.Fatalf("Failed to parse a valid redisVersion: %s", err) + } + if !(ver.major == 6 && ver.minor == 2) { + t.Fatalf("Expect %s, got %s", "6.2", ver) + } + if ver.String() != "6.2.19" { + t.Fatalf("Expect %s, got %s", "6.2.19", ver) + } + }) +} + +func TestParseRedisInfo(t *testing.T) { + t.Run("Should parse the fields we are interested in", func(t *testing.T) { + input := `# Server + redis_version:6.1.240 + redis_git_sha1:00000000 + redis_git_dirty:0 + redis_build_id:a26db646ea64a07c + redis_mode:standalone + os:Linux 5.4.0-1017-aws x86_64 + arch_bits:64 + multiplexing_api:epoll + atomicvar_api:c11-builtin + gcc_version:9.3.0 + process_id:2755423 + process_supervised:no + run_id:d04b36ea49704b152d8ce82bf563d26bcd52e741 + tcp_port:6379 + server_time_usec:1610404734862725 + uptime_in_seconds:2430194 + uptime_in_days:28 + hz:10 + configured_hz:10 + lru_clock:16569214 + executable:/usr/local/bin/redis-server + config_file:/etc/redis/redis.conf + io_threads_active:0 + + # Clients + connected_clients:2 + cluster_connections:0 + maxclients:10000 + client_recent_max_input_buffer:24 + client_recent_max_output_buffer:0 + blocked_clients:0 + tracking_clients:0 + clients_in_timeout_table:0 + + # Memory + used_memory:200001664 + used_memory_human:190.74M + used_memory_rss:210456576 + used_memory_rss_human:200.71M + used_memory_peak:200060312 + used_memory_peak_human:190.79M + used_memory_peak_perc:99.97% + used_memory_overhead:54246680 + used_memory_startup:803648 + used_memory_dataset:145754984 + used_memory_dataset_perc:73.17% + allocator_allocated:199994624 + allocator_active:200847360 + allocator_resident:209551360 + total_system_memory:16596942848 + total_system_memory_human:15.46G + used_memory_lua:37888 + used_memory_lua_human:37.00K + used_memory_scripts:0 + used_memory_scripts_human:0B + number_of_cached_scripts:0 + maxmemory:200000000 + maxmemory_human:190.73M + maxmemory_policy:allkeys-lru + allocator_frag_ratio:1.00 + allocator_frag_bytes:852736 + allocator_rss_ratio:1.04 + allocator_rss_bytes:8704000 + rss_overhead_ratio:1.00 + rss_overhead_bytes:905216 + mem_fragmentation_ratio:1.05 + mem_fragmentation_bytes:10538760 + mem_not_counted_for_evict:0 + mem_replication_backlog:0 + mem_clients_slaves:0 + mem_clients_normal:41008 + mem_aof_buffer:0 + mem_allocator:jemalloc-5.1.0 + active_defrag_running:0 + lazyfree_pending_objects:0 + lazyfreed_objects:0 + + # Persistence + loading:0 + rdb_changes_since_last_save:6407091 + rdb_bgsave_in_progress:0 + rdb_last_save_time:1607974540 + rdb_last_bgsave_status:ok + rdb_last_bgsave_time_sec:-1 + rdb_current_bgsave_time_sec:-1 + rdb_last_cow_size:0 + aof_enabled:0 + aof_rewrite_in_progress:0 + aof_rewrite_scheduled:0 + aof_last_rewrite_time_sec:-1 + aof_current_rewrite_time_sec:-1 + aof_last_bgrewrite_status:ok + aof_last_write_status:ok + aof_last_cow_size:0 + module_fork_in_progress:0 + module_fork_last_cow_size:0 + + # Stats + total_connections_received:127469 + total_commands_processed:15725530 + instantaneous_ops_per_sec:8 + total_net_input_bytes:1305500885 + total_net_output_bytes:237264322 + instantaneous_input_kbps:0.74 + instantaneous_output_kbps:0.10 + rejected_connections:0 + sync_full:0 + sync_partial_ok:0 + sync_partial_err:0 + expired_keys:41809 + expired_stale_perc:0.00 + expired_time_cap_reached_count:0 + expire_cycle_cpu_milliseconds:75107 + evicted_keys:182417 + keyspace_hits:3627925 + keyspace_misses:1661042 + pubsub_channels:0 + pubsub_patterns:0 + latest_fork_usec:0 + total_forks:0 + migrate_cached_sockets:0 + slave_expires_tracked_keys:0 + active_defrag_hits:0 + active_defrag_misses:0 + active_defrag_key_hits:0 + active_defrag_key_misses:0 + tracking_total_keys:0 + tracking_total_items:0 + tracking_total_prefixes:0 + unexpected_error_replies:0 + dump_payload_sanitizations:0 + total_reads_processed:15835400 + total_writes_processed:15835323 + io_threaded_reads_processed:0 + io_threaded_writes_processed:0 + + # Replication + role:master + connected_slaves:0 + master_replid:d4fc9b96fa0c5d3eb4c4444a394ba6e4e40cc0d5 + master_replid2:0000000000000000000000000000000000000000 + master_repl_offset:0 + second_repl_offset:-1 + repl_backlog_active:0 + repl_backlog_size:1048576 + repl_backlog_first_byte_offset:0 + repl_backlog_histlen:0 + + # CPU + used_cpu_sys:3574.527853 + used_cpu_user:13274.227145 + used_cpu_sys_children:0.000000 + used_cpu_user_children:0.000000 + used_cpu_sys_main_thread:3553.579738 + used_cpu_user_main_thread:13249.100447 + + # Modules + + # Cluster + cluster_enabled:0 + + # Keyspace + db0:keys=1125326,expires=5,avg_ttl=321749445601195` + info, err := checkRedisInfo(input) + if err != nil { + t.Fatalf("Failed to parse redis info: %s", err) + } + if info.redisVersion != "6.1.240" { + t.Fatalf("Expect %s, got %q", "6.1.240", info.redisVersion) + } + if info.aofEnabled { + t.Fatalf("Expect %t, got %t", false, true) + } + if info.clusterEnabled { + t.Fatalf("Expect %t, got %t", false, true) + } + if info.maxMemoryPolicy != "allkeys-lru" { + t.Fatalf("Expect %s, got %s", "allkeys-lru", info.maxMemoryPolicy) + } + }) + t.Run("Test fields that may emit warnings", func(t *testing.T) { + input := `# Server + redis_version:2.1.0 + + # Cluster + cluster_enabled:1` + info, err := checkRedisInfo(input) + if err != nil { + t.Fatalf("Failed to parse redis info: %s", err) + } + if !info.clusterEnabled { + t.Fatalf("Expect %t, got %t", true, false) + } + }) +} diff --git a/pkg/meta/interface.go b/pkg/meta/interface.go new file mode 100644 index 0000000..0c7da6a --- /dev/null +++ b/pkg/meta/interface.go @@ -0,0 +1,421 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "io" + "os" + "strings" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/version" +) + +const ( + // ChunkSize is size of a chunk + ChunkSize = 1 << 26 // 64M + // DeleteChunk is a message to delete a chunk from object store. + DeleteChunk = 1000 + // CompactChunk is a message to compact a chunk in object store. + CompactChunk = 1001 + // Rmr is a message to remove a directory recursively. + Rmr = 1002 + // Info is a message to get the internal info for file or directory. + Info = 1003 + // FillCache is a message to build cache for target directories/files + FillCache = 1004 +) + +const ( + TypeFile = 1 // type for regular file + TypeDirectory = 2 // type for directory + TypeSymlink = 3 // type for symlink + TypeFIFO = 4 // type for FIFO node + TypeBlockDev = 5 // type for block device + TypeCharDev = 6 // type for character device + TypeSocket = 7 // type for socket +) + +const ( + RenameNoReplace = 1 << iota + RenameExchange + RenameWhiteout +) + +const ( + // SetAttrMode is a mask to update a attribute of node + SetAttrMode = 1 << iota + SetAttrUID + SetAttrGID + SetAttrSize + SetAttrAtime + SetAttrMtime + SetAttrCtime + SetAttrAtimeNow + SetAttrMtimeNow +) + +const TrashInode = 0x7FFFFFFF10000000 // larger than vfs.minInternalNode +const TrashName = ".trash" + +func isTrash(ino Ino) bool { + return ino >= TrashInode +} + +type internalNode struct { + inode Ino + name string +} + +// MsgCallback is a callback for messages from meta service. +type MsgCallback func(...interface{}) error + +// Attr represents attributes of a node. +type Attr struct { + Flags uint8 // reserved flags + Typ uint8 // type of a node + Mode uint16 // permission mode + Uid uint32 // owner id + Gid uint32 // group id of owner + Rdev uint32 // device number + Atime int64 // last access time + Mtime int64 // last modified time + Ctime int64 // last change time for meta + Atimensec uint32 // nanosecond part of atime + Mtimensec uint32 // nanosecond part of mtime + Ctimensec uint32 // nanosecond part of ctime + Nlink uint32 // number of links (sub-directories or hardlinks) + Length uint64 // length of regular file + + Parent Ino // inode of parent, only for Directory + Full bool // the attributes are completed or not + KeepCache bool // whether to keep the cached page or not +} + +func typeToStatType(_type uint8) uint32 { + switch _type & 0x7F { + case TypeDirectory: + return syscall.S_IFDIR + case TypeSymlink: + return syscall.S_IFLNK + case TypeFile: + return syscall.S_IFREG + case TypeFIFO: + return syscall.S_IFIFO + case TypeSocket: + return syscall.S_IFSOCK + case TypeBlockDev: + return syscall.S_IFBLK + case TypeCharDev: + return syscall.S_IFCHR + default: + panic(_type) + } +} + +func typeToString(_type uint8) string { + switch _type { + case TypeFile: + return "regular" + case TypeDirectory: + return "directory" + case TypeSymlink: + return "symlink" + case TypeFIFO: + return "fifo" + case TypeBlockDev: + return "blockdev" + case TypeCharDev: + return "chardev" + case TypeSocket: + return "socket" + default: + return "unknown" + } +} + +func typeFromString(s string) uint8 { + switch s { + case "regular": + return TypeFile + case "directory": + return TypeDirectory + case "symlink": + return TypeSymlink + case "fifo": + return TypeFIFO + case "blockdev": + return TypeBlockDev + case "chardev": + return TypeCharDev + case "socket": + return TypeSocket + default: + panic(s) + } +} + +// SMode is the file mode including type and unix permission. +func (a Attr) SMode() uint32 { + return typeToStatType(a.Typ) | uint32(a.Mode) +} + +// Entry is an entry inside a directory. +type Entry struct { + Inode Ino + Name []byte + Attr *Attr +} + +// Slice is a slice of a chunk. +// Multiple slices could be combined together as a chunk. +type Slice struct { + Chunkid uint64 + Size uint32 + Off uint32 + Len uint32 +} + +// Summary represents the total number of files/directories and +// total length of all files inside a directory. +type Summary struct { + Length uint64 + Size uint64 + Files uint64 + Dirs uint64 +} + +type SessionInfo struct { + Version string + Hostname string + MountPoint string + ProcessID int +} + +type Flock struct { + Inode Ino + Owner uint64 + Ltype string +} + +type Plock struct { + Inode Ino + Owner uint64 + Records []byte // FIXME: loadLocks +} + +// Session contains detailed information of a client session +type Session struct { + Sid uint64 + Heartbeat time.Time + SessionInfo + Sustained []Ino `json:",omitempty"` + Flocks []Flock `json:",omitempty"` + Plocks []Plock `json:",omitempty"` +} + +// Meta is a interface for a meta service for file system. +type Meta interface { + // Name of database + Name() string + // Init is used to initialize a meta service. + Init(format Format, force bool) error + // Shutdown close current database connections. + Shutdown() error + // Reset cleans up all metadata, VERY DANGEROUS! + Reset() error + // Load loads the existing setting of a formatted volume from meta service. + Load() (*Format, error) + // NewSession creates a new client session. + NewSession() error + // CloseSession does cleanup and close the session. + CloseSession() error + // GetSession retrieves information of session with sid + GetSession(sid uint64) (*Session, error) + // ListSessions returns all client sessions. + ListSessions() ([]*Session, error) + // CleanStaleSessions cleans up sessions not active for more than 5 minutes + CleanStaleSessions() + + // StatFS returns summary statistics of a volume. + StatFS(ctx Context, totalspace, availspace, iused, iavail *uint64) syscall.Errno + // Access checks the access permission on given inode. + Access(ctx Context, inode Ino, modemask uint8, attr *Attr) syscall.Errno + // Lookup returns the inode and attributes for the given entry in a directory. + Lookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno + // Resolve fetches the inode and attributes for an entry identified by the given path. + // ENOTSUP will be returned if there's no natural implementation for this operation or + // if there are any symlink following involved. + Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno + // GetAttr returns the attributes for given node. + GetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno + // SetAttr updates the attributes for given node. + SetAttr(ctx Context, inode Ino, set uint16, sggidclearmode uint8, attr *Attr) syscall.Errno + // Truncate changes the length for given file. + Truncate(ctx Context, inode Ino, flags uint8, attrlength uint64, attr *Attr) syscall.Errno + // Fallocate preallocate given space for given file. + Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64) syscall.Errno + // ReadLink returns the target of a symlink. + ReadLink(ctx Context, inode Ino, path *[]byte) syscall.Errno + // Symlink creates a symlink in a directory with given name. + Symlink(ctx Context, parent Ino, name string, path string, inode *Ino, attr *Attr) syscall.Errno + // Mknod creates a node in a directory with given name, type and permissions. + Mknod(ctx Context, parent Ino, name string, _type uint8, mode uint16, cumask uint16, rdev uint32, inode *Ino, attr *Attr) syscall.Errno + // Mkdir creates a sub-directory with given name and mode. + Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16, copysgid uint8, inode *Ino, attr *Attr) syscall.Errno + // Unlink removes a file entry from a directory. + // The file will be deleted if it's not linked by any entries and not open by any sessions. + Unlink(ctx Context, parent Ino, name string) syscall.Errno + // Rmdir removes an empty sub-directory. + Rmdir(ctx Context, parent Ino, name string) syscall.Errno + // Rename move an entry from a source directory to another with given name. + // The targeted entry will be overwrited if it's a file or empty directory. + // For Hadoop, the target should not be overwritten. + Rename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno + // Link creates an entry for node. + Link(ctx Context, inodeSrc, parent Ino, name string, attr *Attr) syscall.Errno + // Readdir returns all entries for given directory, which include attributes if plus is true. + Readdir(ctx Context, inode Ino, wantattr uint8, entries *[]*Entry) syscall.Errno + // Create creates a file in a directory with given name. + Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32, inode *Ino, attr *Attr) syscall.Errno + // Open checks permission on a node and track it as open. + Open(ctx Context, inode Ino, flags uint32, attr *Attr) syscall.Errno + // Close a file. + Close(ctx Context, inode Ino) syscall.Errno + // Read returns the list of slices on the given chunk. + Read(ctx Context, inode Ino, indx uint32, chunks *[]Slice) syscall.Errno + // NewChunk returns a new id for new data. + NewChunk(ctx Context, chunkid *uint64) syscall.Errno + // Write put a slice of data on top of the given chunk. + Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice) syscall.Errno + // InvalidateChunkCache invalidate chunk cache + InvalidateChunkCache(ctx Context, inode Ino, indx uint32) syscall.Errno + // CopyFileRange copies part of a file to another one. + CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied *uint64) syscall.Errno + + // GetXattr returns the value of extended attribute for given name. + GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno + // ListXattr returns all extended attributes of a node. + ListXattr(ctx Context, inode Ino, dbuff *[]byte) syscall.Errno + // SetXattr update the extended attribute of a node. + SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno + // RemoveXattr removes the extended attribute of a node. + RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno + // Flock tries to put a lock on given file. + Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno + // Getlk returns the current lock owner for a range on a file. + Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno + // Setlk sets a file range lock on given file. + Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno + + // Compact all the chunks by merge small slices together + CompactAll(ctx Context, bar *utils.Bar) syscall.Errno + // ListSlices returns all slices used by all files. + ListSlices(ctx Context, slices map[Ino][]Slice, delete bool, showProgress func()) syscall.Errno + + // OnMsg add a callback for the given message type. + OnMsg(mtype uint32, cb MsgCallback) + + // Dump the tree under root; 0 means using root of the current metadata engine + DumpMeta(w io.Writer, root Ino) error + LoadMeta(r io.Reader) error +} + +type Creator func(driver, addr string, conf *Config) (Meta, error) + +var metaDrivers = make(map[string]Creator) + +func Register(name string, register Creator) { + metaDrivers[name] = register +} + +// NewClient creates a Meta client for given uri. +func NewClient(uri string, conf *Config) Meta { + if !strings.Contains(uri, "://") { + uri = "redis://" + uri + } + logger.Infof("Meta address: %s", utils.RemovePassword(uri)) + if os.Getenv("META_PASSWORD") != "" { + p := strings.Index(uri, ":@") + if p > 0 { + uri = uri[:p+1] + os.Getenv("META_PASSWORD") + uri[p+1:] + } + } + p := strings.Index(uri, "://") + if p < 0 { + logger.Fatalf("invalid uri: %s", uri) + } + driver := uri[:p] + f, ok := metaDrivers[driver] + if !ok { + logger.Fatalf("Invalid meta driver: %s", driver) + } + m, err := f(driver, uri[p+3:], conf) + if err != nil { + logger.Fatalf("Meta is not available: %s", err) + } + return m +} + +func newSessionInfo() *SessionInfo { + host, err := os.Hostname() + if err != nil { + logger.Warnf("Failed to get hostname: %s", err) + host = "" + } + return &SessionInfo{Version: version.Version(), Hostname: host, ProcessID: os.Getpid()} +} + +func timeit(start time.Time) { + opDist.Observe(time.Since(start).Seconds()) +} + +// Get full path of an inode; a random one is picked if it has multiple hard links +func GetPath(m Meta, ctx Context, inode Ino) (string, syscall.Errno) { + var names []string + var attr Attr + for inode != 1 { + if st := m.GetAttr(ctx, inode, &attr); st != 0 { + logger.Debugf("getattr inode %d: %s", inode, st) + return "", st + } + + var entries []*Entry + if st := m.Readdir(ctx, attr.Parent, 0, &entries); st != 0 { + return "", st + } + var name string + for _, e := range entries { + if e.Inode == inode { + name = string(e.Name) + break + } + } + if name == "" { + return "", syscall.ENOENT + } + names = append(names, name) + inode = attr.Parent + } + + for i, j := 0, len(names)-1; i < j; i, j = i+1, j-1 { // reverse + names[i], names[j] = names[j], names[i] + } + return "/" + strings.Join(names, "/"), 0 +} diff --git a/pkg/meta/load_dump_test.go b/pkg/meta/load_dump_test.go new file mode 100644 index 0000000..8b62e67 --- /dev/null +++ b/pkg/meta/load_dump_test.go @@ -0,0 +1,135 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "os" + "os/exec" + "path" + "testing" +) + +const sampleFile = "metadata.sample" +const subSampleFile = "metadata-sub.sample" + +func testLoad(t *testing.T, uri, fname string) Meta { + m := NewClient(uri, &Config{Retries: 10, Strict: true}) + if err := m.Reset(); err != nil { + t.Fatalf("reset meta: %s", err) + } + fp, err := os.Open(fname) + if err != nil { + t.Fatalf("open file: %s", fname) + } + defer fp.Close() + if err = m.LoadMeta(fp); err != nil { + t.Fatalf("load meta: %s", err) + } + + ctx := Background + var entries []*Entry + if st := m.Readdir(ctx, 1, 0, &entries); st != 0 { + t.Fatalf("readdir: %s", st) + } else if len(entries) != 8 { + t.Fatalf("entries: %d", len(entries)) + } + attr := &Attr{} + if st := m.GetAttr(ctx, 2, attr); st != 0 { + t.Fatalf("getattr: %s", st) + } + if attr.Nlink != 1 || attr.Length != 24 { + t.Fatalf("nlink: %d, length: %d", attr.Nlink, attr.Length) + } + var chunks []Slice + if st := m.Read(ctx, 2, 0, &chunks); st != 0 { + t.Fatalf("read chunk: %s", st) + } + if len(chunks) != 1 || chunks[0].Chunkid != 4 || chunks[0].Size != 24 { + t.Fatalf("chunks: %v", chunks) + } + if st := m.GetAttr(ctx, 4, attr); st != 0 || attr.Nlink != 2 { // hard link + t.Fatalf("getattr: %s, %d", st, attr.Nlink) + } + var target []byte + if st := m.ReadLink(ctx, 5, &target); st != 0 || string(target) != "d1/f11" { // symlink + t.Fatalf("readlink: %s, %s", st, target) + } + var value []byte + if st := m.GetXattr(ctx, 2, "k", &value); st != 0 || string(value) != "v" { + t.Fatalf("getxattr: %s %v", st, value) + } + + return m +} + +func testDump(t *testing.T, m Meta, root Ino, expect, result string) { + fp, err := os.OpenFile(result, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + t.Fatalf("open file %s: %s", result, err) + } + defer fp.Close() + if err = m.DumpMeta(fp, root); err != nil { + t.Fatalf("dump meta: %s", err) + } + cmd := exec.Command("diff", expect, result) + if out, err := cmd.Output(); err != nil { + t.Fatalf("diff %s %s: %s", expect, result, out) + } +} + +func TestLoadDump(t *testing.T) { + t.Run("Metadata Engine: Redis", func(t *testing.T) { + m := testLoad(t, "redis://127.0.0.1/10", sampleFile) + testDump(t, m, 0, sampleFile, "redis.dump") + }) + t.Run("Metadata Engine: Redis; --SubDir d1 ", func(t *testing.T) { + _ = testLoad(t, "redis://127.0.0.1/10", sampleFile) + m := NewClient("redis://127.0.0.1/10", &Config{Retries: 10, Strict: true, Subdir: "d1"}) + testDump(t, m, 0, subSampleFile, "redis_subdir.dump") + testDump(t, m, 1, sampleFile, "redis.dump") + }) + + sqluri := "sqlite3://" + path.Join(t.TempDir(), "jfs-load-dump-test.db") + t.Run("Metadata Engine: SQLite", func(t *testing.T) { + m := testLoad(t, sqluri, sampleFile) + testDump(t, m, 0, sampleFile, "sqlite3.dump") + }) + t.Run("Metadata Engine: SQLite --SubDir d1", func(t *testing.T) { + _ = testLoad(t, sqluri, sampleFile) + m := NewClient(sqluri, &Config{Retries: 10, Strict: true, Subdir: "d1"}) + testDump(t, m, 0, subSampleFile, "sqlite3_subdir.dump") + testDump(t, m, 1, sampleFile, "sqlite3.dump") + }) + + t.Run("Metadata Engine: TKV", func(t *testing.T) { + _ = os.Remove(settingPath) + m := testLoad(t, "memkv://test/jfs", sampleFile) + testDump(t, m, 0, sampleFile, "tkv.dump") + }) + t.Run("Metadata Engine: TKV --SubDir d1 ", func(t *testing.T) { + _ = os.Remove(settingPath) + m := testLoad(t, "memkv://user:passwd@test/jfs", sampleFile) + if kvm, ok := m.(*kvMeta); ok { // memkv will be empty if created again + var err error + if kvm.root, err = lookupSubdir(kvm, "d1"); err != nil { + t.Fatalf("lookup subdir d1: %s", err) + } + } + testDump(t, m, 0, subSampleFile, "tkv_subdir.dump") + testDump(t, m, 1, sampleFile, "tkv.dump") + }) +} diff --git a/pkg/meta/lua_scripts.go b/pkg/meta/lua_scripts.go new file mode 100644 index 0000000..7028091 --- /dev/null +++ b/pkg/meta/lua_scripts.go @@ -0,0 +1,95 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint +package meta + +const scriptLookup = ` +local buf = redis.call('HGET', KEYS[1], KEYS[2]) +if not buf then + error("ENOENT") +end +local ino = struct.unpack(">I8", string.sub(buf, 2)) +-- double float has 52 significant bits +if ino > 4503599627370495 then + error("ENOTSUP") +end +return {ino, redis.call('GET', "i" .. string.format("%.f", ino))} +` + +const scriptResolve = ` +local function unpack_attr(buf) + local x = {} + x.flags, x.mode, x.uid, x.gid = struct.unpack(">BHI4I4", string.sub(buf, 0, 11)) + x.type = math.floor(x.mode / 4096) % 8 + x.mode = x.mode % 4096 + return x +end + +local function get_attr(ino) + local encoded_attr = redis.call('GET', "i" .. string.format("%.f", ino)) + if not encoded_attr then + error("ENOENT") + end + return unpack_attr(encoded_attr) +end + +local function lookup(parent, name) + local buf = redis.call('HGET', "d" .. string.format("%.f", parent), name) + if not buf then + error("ENOENT") + end + return struct.unpack(">BI8", buf) +end + +local function can_access(ino, uid, gid) + if uid == 0 then + return true + end + + local attr = get_attr(ino) + local mode = 0 + if attr.uid == uid then + mode = math.floor(attr.mode / 64) % 8 + elseif attr.gid == gid then + mode = math.floor(attr.mode / 8) % 8 + else + mode = attr.mode % 8 + end + return mode % 2 == 1 +end + +local function resolve(parent, path, uid, gid) + local _maxIno = 4503599627370495 + local _type = 2 + for name in string.gmatch(path, "[^/]+") do + if _type == 3 or parent > _maxIno then + error("ENOTSUP") + elseif _type ~= 2 then + error("ENOTDIR") + elseif parent > 1 and not can_access(parent, uid, gid) then + error("EACCESS") + end + _type, parent = lookup(parent, name) + end + if parent > _maxIno then + error("ENOTSUP") + end + return {parent, redis.call('GET', "i" .. string.format("%.f", parent))} +end + +return resolve(tonumber(KEYS[1]), KEYS[2], tonumber(KEYS[3]), tonumber(KEYS[4])) +` diff --git a/pkg/meta/metadata-sub.sample b/pkg/meta/metadata-sub.sample new file mode 100644 index 0000000..f415ed3 --- /dev/null +++ b/pkg/meta/metadata-sub.sample @@ -0,0 +1,50 @@ +{ + "Setting": { + "Name": "load-dump-test", + "UUID": "faa27c8f-edab-4791-a4e0-1620b732b343", + "Storage": "file", + "Bucket": "/Users/juicefs/.juicefs/local/", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 1 + }, + "Counters": { + "usedSpace": 115380224, + "usedInodes": 11, + "nextInodes": 30, + "nextChunk": 9, + "nextSession": 0, + "nextTrash": 1, + "nextCleanupSlices": 0 + }, + "Sustained": [], + "DelFiles": [ + { + "inode": 23, + "length": 0, + "expire": 1637664458 + } + ], + "FSTree": { + "attr": {"inode":3,"type":"directory","mode":493,"uid":501,"gid":20,"atime":1623746591,"mtime":1623746610,"ctime":1623746610,"atimensec":959224000,"mtimensec":959224000,"ctimensec":959224000,"nlink":2,"length":0}, + "xattrs": [{"name":"dk","value":"dv"}], + "entries": { + "big": { + "attr": {"inode":6,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1637150857,"mtime":1637150858,"ctime":1637150878,"atimensec":961503000,"mtimensec":961503000,"ctimensec":961503000,"nlink":1,"length":104857600}, + "chunks": [ + {"index":0,"slices":[{"chunkid":5,"pos":0,"size":67108864,"off":0,"len":67108864}]}, + {"index":1,"slices":[{"chunkid":6,"pos":0,"size":37748736,"off":0,"len":37748736}]} + ] + }, + "f11": { + "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590000,"mtimensec":591590000,"ctimensec":591590000,"nlink":2,"length":12}, + "chunks": [{"index":0,"slices":[{"chunkid":2,"pos":0,"size":12,"off":0,"len":12}]}] + } + } + } +} diff --git a/pkg/meta/metadata.sample b/pkg/meta/metadata.sample new file mode 100644 index 0000000..e12943e --- /dev/null +++ b/pkg/meta/metadata.sample @@ -0,0 +1,101 @@ +{ + "Setting": { + "Name": "load-dump-test", + "UUID": "faa27c8f-edab-4791-a4e0-1620b732b343", + "Storage": "file", + "Bucket": "/Users/juicefs/.juicefs/local/", + "AccessKey": "", + "BlockSize": 4096, + "Compression": "none", + "Shards": 0, + "Partitions": 0, + "Capacity": 0, + "Inodes": 0, + "TrashDays": 1 + }, + "Counters": { + "usedSpace": 115380224, + "usedInodes": 11, + "nextInodes": 30, + "nextChunk": 9, + "nextSession": 0, + "nextTrash": 1, + "nextCleanupSlices": 0 + }, + "Sustained": [], + "DelFiles": [ + { + "inode": 23, + "length": 0, + "expire": 1637664458 + } + ], + "FSTree": { + "attr": {"inode":1,"type":"directory","mode":511,"uid":0,"gid":0,"atime":1623745101,"mtime":1638437879,"ctime":1638437879,"atimensec":0,"mtimensec":0,"ctimensec":0,"nlink":4,"length":0}, + "xattrs": [{"name":"lastBackup","value":"2021-11-23T18:29:54+08:00"}], + "entries": { + "d": { + "attr": {"inode":25,"type":"directory","mode":493,"uid":1,"gid":0,"atime":1637664458,"mtime":1637664458,"ctime":1637664458,"atimensec":862381000,"mtimensec":862381000,"ctimensec":862381000,"nlink":2,"length":0}, + "entries": { + } + }, + "d1": { + "attr": {"inode":3,"type":"directory","mode":493,"uid":501,"gid":20,"atime":1623746591,"mtime":1623746610,"ctime":1623746610,"atimensec":959224000,"mtimensec":959224000,"ctimensec":959224000,"nlink":2,"length":0}, + "xattrs": [{"name":"dk","value":"dv"}], + "entries": { + "big": { + "attr": {"inode":6,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1637150857,"mtime":1637150858,"ctime":1637150878,"atimensec":961503000,"mtimensec":961503000,"ctimensec":961503000,"nlink":1,"length":104857600}, + "chunks": [ + {"index":0,"slices":[{"chunkid":5,"pos":0,"size":67108864,"off":0,"len":67108864}]}, + {"index":1,"slices":[{"chunkid":6,"pos":0,"size":37748736,"off":0,"len":37748736}]} + ] + }, + "f11": { + "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590000,"mtimensec":591590000,"ctimensec":591590000,"nlink":2,"length":12}, + "chunks": [{"index":0,"slices":[{"chunkid":2,"pos":0,"size":12,"off":0,"len":12}]}] + } + } + }, + "f1": { + "attr": {"inode":2,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746580,"mtime":1623746661,"ctime":1623746661,"atimensec":219686000,"mtimensec":219686000,"ctimensec":219686000,"nlink":1,"length":24}, + "xattrs": [{"name":"k","value":"v"}], + "chunks": [{"index":0,"slices":[{"chunkid":1,"pos":0,"size":6,"off":0,"len":6},{"chunkid":3,"pos":0,"size":12,"off":0,"len":12},{"chunkid":4,"pos":0,"size":24,"off":0,"len":24}]}] + }, + "l1": { + "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590000,"mtimensec":591590000,"ctimensec":591590000,"nlink":2,"length":12}, + "chunks": [{"index":0,"slices":[{"chunkid":2,"pos":0,"size":12,"off":0,"len":12}]}] + }, + "s1": { + "attr": {"inode":5,"type":"symlink","mode":420,"uid":501,"gid":20,"atime":1623746645,"mtime":1623746645,"ctime":1623746645,"atimensec":984144000,"mtimensec":984144000,"ctimensec":984144000,"nlink":1,"length":0}, + "symlink": "d1/f11" + }, + "sd": { + "attr": {"inode":26,"type":"symlink","mode":420,"uid":1,"gid":0,"atime":1637664458,"mtime":1637664458,"ctime":1637664458,"atimensec":873647000,"mtimensec":873647000,"ctimensec":873647000,"nlink":1,"length":0}, + "symlink": "d" + } + } + }, + "Trash": { + "attr": {"inode":9223372032828243968,"type":"directory","mode":365,"uid":0,"gid":0,"atime":1623745101,"mtime":1638437877,"ctime":1638437877,"atimensec":0,"mtimensec":0,"ctimensec":0,"nlink":3,"length":0}, + "entries": { + "2021-12-02-09": { + "attr": {"inode":9223372032828243969,"type":"directory","mode":365,"uid":0,"gid":0,"atime":1638437877,"mtime":1638437877,"ctime":1638437877,"atimensec":598277000,"mtimensec":598277000,"ctimensec":598277000,"nlink":3,"length":0}, + "entries": { + "1-27-tf1": { + "attr": {"inode":27,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1638437852,"mtime":1638437852,"ctime":1638437877,"atimensec":28186000,"mtimensec":28186000,"ctimensec":28186000,"nlink":1,"length":11}, + "chunks": [{"index":0,"slices":[{"chunkid":7,"pos":0,"size":11,"off":0,"len":11}]}] + }, + "1-28-td1": { + "attr": {"inode":28,"type":"directory","mode":493,"uid":501,"gid":0,"atime":1638437856,"mtime":1638437879,"ctime":1638437879,"atimensec":59246000,"mtimensec":59246000,"ctimensec":59246000,"nlink":2,"length":0}, + "entries": { + } + }, + "28-29-tdf1": { + "attr": {"inode":29,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1638437873,"mtime":1638437873,"ctime":1638437879,"atimensec":449880000,"mtimensec":449880000,"ctimensec":449880000,"nlink":1,"length":10485760}, + "chunks": [{"index":0,"slices":[{"chunkid":8,"pos":0,"size":10485760,"off":0,"len":10485760}]}] + } + } + } + } + } +} diff --git a/pkg/meta/metrics.go b/pkg/meta/metrics.go new file mode 100644 index 0000000..f26a2b1 --- /dev/null +++ b/pkg/meta/metrics.go @@ -0,0 +1,42 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "github.com/prometheus/client_golang/prometheus" + +var ( + txDist = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "transaction_durations_histogram_seconds", + Help: "Transactions latency distributions.", + Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30), + }) + txRestart = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "transaction_restart", + Help: "The number of times a transaction is restarted.", + }) + opDist = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "meta_ops_durations_histogram_seconds", + Help: "Operation latency distributions.", + Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30), + }) +) + +func InitMetrics() { + prometheus.MustRegister(txDist) + prometheus.MustRegister(txRestart) + prometheus.MustRegister(opDist) +} diff --git a/pkg/meta/metrics_test.go b/pkg/meta/metrics_test.go new file mode 100644 index 0000000..ceca3d2 --- /dev/null +++ b/pkg/meta/metrics_test.go @@ -0,0 +1,32 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestInitMetrics(t *testing.T) { + InitMetrics() + for _, collector := range []prometheus.Collector{txDist, txRestart, opDist} { + if _, ok := prometheus.Register(collector).(prometheus.AlreadyRegisteredError); !ok { + t.Fatalf("TestInitMetrics Failed") + } + } +} diff --git a/pkg/meta/openfile.go b/pkg/meta/openfile.go new file mode 100644 index 0000000..a24b639 --- /dev/null +++ b/pkg/meta/openfile.go @@ -0,0 +1,171 @@ +package meta + +import ( + "sync" + "time" +) + +type openFile struct { + sync.RWMutex + attr Attr + refs int + lastCheck time.Time + chunks map[uint32][]Slice +} + +type openfiles struct { + sync.Mutex + expire time.Duration + files map[Ino]*openFile +} + +func newOpenFiles(expire time.Duration) *openfiles { + of := &openfiles{ + expire: expire, + files: make(map[Ino]*openFile), + } + go of.cleanup() + return of +} + +func (o *openfiles) cleanup() { + for { + o.Lock() + cutoff := time.Now().Add(-time.Hour) + for ino, of := range o.files { + if of.refs <= 0 && of.lastCheck.Before(cutoff) { + delete(o.files, ino) + } + } + o.Unlock() + time.Sleep(time.Second) + } +} + +func (o *openfiles) OpenCheck(ino Ino, attr *Attr) bool { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok && time.Since(of.lastCheck) < o.expire { + if attr != nil { + *attr = of.attr + } + of.refs++ + return true + } + return false +} + +func (o *openfiles) Open(ino Ino, attr *Attr) { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if !ok { + of = &openFile{} + of.chunks = make(map[uint32][]Slice) + o.files[ino] = of + } else if attr != nil && attr.Mtime == of.attr.Mtime && attr.Mtimensec == of.attr.Mtimensec { + attr.KeepCache = of.attr.KeepCache + } else { + of.chunks = make(map[uint32][]Slice) + } + if attr != nil { + of.attr = *attr + } + // next open can keep cache if not modified + of.attr.KeepCache = true + of.refs++ + of.lastCheck = time.Now() +} + +func (o *openfiles) Close(ino Ino) bool { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok { + of.refs-- + return of.refs <= 0 + } + return true +} + +func (o *openfiles) Check(ino Ino, attr *Attr) bool { + if attr == nil { + panic("attr is nil") + } + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok && time.Since(of.lastCheck) < o.expire { + *attr = of.attr + return true + } + return false +} + +func (o *openfiles) Update(ino Ino, attr *Attr) bool { + if attr == nil { + panic("attr is nil") + } + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok { + if attr.Mtime != of.attr.Mtime || attr.Mtimensec != of.attr.Mtimensec { + of.chunks = make(map[uint32][]Slice) + } else { + attr.KeepCache = of.attr.KeepCache + } + of.attr = *attr + of.lastCheck = time.Now() + return true + } + return false +} + +func (o *openfiles) IsOpen(ino Ino) bool { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + return ok && of.refs > 0 +} + +func (o *openfiles) ReadChunk(ino Ino, indx uint32) ([]Slice, bool) { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if !ok { + return nil, false + } + cs, ok := of.chunks[indx] + return cs, ok +} + +func (o *openfiles) CacheChunk(ino Ino, indx uint32, cs []Slice) { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok { + of.chunks[indx] = cs + } +} + +func (o *openfiles) InvalidateChunk(ino Ino, indx uint32) { + o.Lock() + defer o.Unlock() + of, ok := o.files[ino] + if ok { + if indx == 0xFFFFFFFF { + of.chunks = make(map[uint32][]Slice) + } else { + delete(of.chunks, indx) + } + of.lastCheck = time.Unix(0, 0) + } +} + +func (o *openfiles) find(ino Ino) *openFile { + o.Lock() + defer o.Unlock() + return o.files[ino] +} diff --git a/pkg/meta/redis.go b/pkg/meta/redis.go new file mode 100644 index 0000000..fbb423c --- /dev/null +++ b/pkg/meta/redis.go @@ -0,0 +1,3061 @@ +//go:build !noredis +// +build !noredis + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bufio" + "context" + "encoding/binary" + "encoding/json" + "fmt" + "hash/fnv" + "io" + "math/rand" + "net" + "os" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "syscall" + "time" + + "github.com/pkg/errors" + + "github.com/go-redis/redis/v8" + "github.com/juicedata/juicefs/pkg/utils" +) + +/* + Node: i$inode -> Attribute{type,mode,uid,gid,atime,mtime,ctime,nlink,length,rdev} + Dir: d$inode -> {name -> {inode,type}} + File: c$inode_$indx -> [Slice{pos,id,length,off,len}] + Symlink: s$inode -> target + Xattr: x$inode -> {name -> value} + Flock: lockf$inode -> { $sid_$owner -> ltype } + POSIX lock: lockp$inode -> { $sid_$owner -> Plock(pid,ltype,start,end) } + Sessions: sessions -> [ $sid -> heartbeat ] + sustained: session$sid -> [$inode] + locked: locked$sid -> { lockf$inode or lockp$inode } + + Removed files: delfiles -> [$inode:$length -> seconds] + Slices refs: k$chunkid_$size -> refcount + + Redis features: + Sorted Set: 1.2+ + Hash Set: 4.0+ + Transaction: 2.2+ + Scripting: 2.6+ + Scan: 2.8+ +*/ + +type redisMeta struct { + baseMeta + rdb *redis.Client + txlocks [1024]sync.Mutex // Pessimistic locks to reduce conflict on Redis + shaLookup string // The SHA returned by Redis for the loaded `scriptLookup` + shaResolve string // The SHA returned by Redis for the loaded `scriptResolve` + snap *redisSnap +} + +var _ Meta = &redisMeta{} + +func init() { + Register("redis", newRedisMeta) + Register("rediss", newRedisMeta) +} + +// newRedisMeta return a meta store using Redis. +func newRedisMeta(driver, addr string, conf *Config) (Meta, error) { + url := driver + "://" + addr + opt, err := redis.ParseURL(url) + if err != nil { + return nil, fmt.Errorf("parse %s: %s", url, err) + } + var rdb *redis.Client + if strings.Contains(opt.Addr, ",") { + var fopt redis.FailoverOptions + ps := strings.Split(opt.Addr, ",") + fopt.MasterName = ps[0] + fopt.SentinelAddrs = ps[1:] + _, port, _ := net.SplitHostPort(fopt.SentinelAddrs[len(fopt.SentinelAddrs)-1]) + if port == "" { + port = "26379" + } + for i, addr := range fopt.SentinelAddrs { + h, p, e := net.SplitHostPort(addr) + if e != nil { + fopt.SentinelAddrs[i] = net.JoinHostPort(addr, port) + } else if p == "" { + fopt.SentinelAddrs[i] = net.JoinHostPort(h, port) + } + } + fopt.Username = opt.Username + fopt.Password = opt.Password + if fopt.Password == "" && os.Getenv("REDIS_PASSWORD") != "" { + fopt.Password = os.Getenv("REDIS_PASSWORD") + } + fopt.SentinelPassword = os.Getenv("SENTINEL_PASSWORD") + fopt.DB = opt.DB + fopt.TLSConfig = opt.TLSConfig + fopt.MaxRetries = conf.Retries + fopt.MinRetryBackoff = time.Millisecond * 100 + fopt.MaxRetryBackoff = time.Minute * 1 + fopt.ReadTimeout = time.Second * 30 + fopt.WriteTimeout = time.Second * 5 + rdb = redis.NewFailoverClient(&fopt) + } else { + if opt.Password == "" && os.Getenv("REDIS_PASSWORD") != "" { + opt.Password = os.Getenv("REDIS_PASSWORD") + } + opt.MaxRetries = conf.Retries + opt.MinRetryBackoff = time.Millisecond * 100 + opt.MaxRetryBackoff = time.Minute * 1 + opt.ReadTimeout = time.Second * 30 + opt.WriteTimeout = time.Second * 5 + rdb = redis.NewClient(opt) + } + + m := &redisMeta{ + baseMeta: newBaseMeta(conf), + rdb: rdb, + } + m.en = m + m.checkServerConfig() + m.root, err = lookupSubdir(m, conf.Subdir) + return m, err +} + +func (r *redisMeta) Shutdown() error { + return r.rdb.Close() +} + +func (m *redisMeta) doDeleteSlice(chunkid uint64, size uint32) error { + return m.rdb.HDel(Background, sliceRefs, m.sliceKey(chunkid, size)).Err() +} + +func (r *redisMeta) Name() string { + return "redis" +} + +func (r *redisMeta) Init(format Format, force bool) error { + ctx := Background + body, err := r.rdb.Get(ctx, "setting").Bytes() + if err != nil && err != redis.Nil { + return err + } + if err == nil { + var old Format + err = json.Unmarshal(body, &old) + if err != nil { + logger.Fatalf("existing format is broken: %s", err) + } + if force { + old.SecretKey = "removed" + logger.Warnf("Existing volume will be overwrited: %+v", old) + } else { + format.UUID = old.UUID + // these can be safely updated. + old.Bucket = format.Bucket + old.AccessKey = format.AccessKey + old.SecretKey = format.SecretKey + old.Capacity = format.Capacity + old.Inodes = format.Inodes + old.TrashDays = format.TrashDays + if format != old { + old.SecretKey = "" + format.SecretKey = "" + return fmt.Errorf("cannot update format from %+v to %+v", old, format) + } + } + } + + data, err := json.MarshalIndent(format, "", "") + if err != nil { + logger.Fatalf("json: %s", err) + } + ts := time.Now().Unix() + attr := &Attr{ + Typ: TypeDirectory, + Atime: ts, + Mtime: ts, + Ctime: ts, + Nlink: 2, + Length: 4 << 10, + Parent: 1, + } + if format.TrashDays > 0 { + attr.Mode = 0555 + if err = r.rdb.SetNX(ctx, r.inodeKey(TrashInode), r.marshal(attr), 0).Err(); err != nil { + return err + } + } + if err = r.rdb.Set(ctx, "setting", data, 0).Err(); err != nil { + return err + } + r.fmt = format + if body != nil { + return nil + } + + // root inode + attr.Mode = 0777 + return r.rdb.Set(ctx, r.inodeKey(1), r.marshal(attr), 0).Err() +} + +func (r *redisMeta) Reset() error { + return r.rdb.FlushDB(Background).Err() +} + +func (r *redisMeta) Load() (*Format, error) { + body, err := r.rdb.Get(Background, "setting").Bytes() + if err == redis.Nil { + return nil, fmt.Errorf("database is not formatted") + } + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &r.fmt) + if err != nil { + return nil, fmt.Errorf("json: %s", err) + } + return &r.fmt, nil +} + +func (r *redisMeta) NewSession() error { + go r.refreshUsage() + if r.conf.ReadOnly { + return nil + } + sid, err := r.incrCounter("nextsession", 1) + if err != nil { + return fmt.Errorf("create session: %s", err) + } + r.sid = uint64(sid) + logger.Debugf("session is %d", r.sid) + r.rdb.ZAdd(Background, allSessions, &redis.Z{Score: float64(time.Now().Unix()), Member: strconv.Itoa(int(r.sid))}) + info := newSessionInfo() + info.MountPoint = r.conf.MountPoint + data, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("json: %s", err) + } + r.rdb.HSet(Background, sessionInfos, r.sid, data) + + r.shaLookup, err = r.rdb.ScriptLoad(Background, scriptLookup).Result() + if err != nil { + logger.Warnf("load scriptLookup: %v", err) + r.shaLookup = "" + } + r.shaResolve, err = r.rdb.ScriptLoad(Background, scriptResolve).Result() + if err != nil { + logger.Warnf("load scriptResolve: %v", err) + r.shaResolve = "" + } + + go r.refreshSession() + go r.cleanupDeletedFiles() + go r.cleanupSlices() + go r.cleanupTrash() + return nil +} + +func (r *redisMeta) incrCounter(name string, v int64) (int64, error) { + if name == "nextInode" || name == "nextChunk" { + // for nextinode, nextchunk + // the current one is already used + v, err := r.rdb.IncrBy(Background, strings.ToLower(name), v).Result() + return v + 1, err + } + return r.rdb.IncrBy(Background, name, v).Result() +} + +func (r *redisMeta) getSession(sid string, detail bool) (*Session, error) { + ctx := Background + info, err := r.rdb.HGet(ctx, sessionInfos, sid).Bytes() + if err == redis.Nil { // legacy client has no info + info = []byte("{}") + } else if err != nil { + return nil, fmt.Errorf("HGet %s %s: %s", sessionInfos, sid, err) + } + var s Session + if err := json.Unmarshal(info, &s); err != nil { + return nil, fmt.Errorf("corrupted session info; json error: %s", err) + } + s.Sid, _ = strconv.ParseUint(sid, 10, 64) + if detail { + inodes, err := r.rdb.SMembers(ctx, r.sustained(s.Sid)).Result() + if err != nil { + return nil, fmt.Errorf("SMembers %s: %s", sid, err) + } + s.Sustained = make([]Ino, 0, len(inodes)) + for _, sinode := range inodes { + inode, _ := strconv.ParseUint(sinode, 10, 64) + s.Sustained = append(s.Sustained, Ino(inode)) + } + + locks, err := r.rdb.SMembers(ctx, r.lockedKey(s.Sid)).Result() + if err != nil { + return nil, fmt.Errorf("SMembers %s: %s", sid, err) + } + s.Flocks = make([]Flock, 0, len(locks)) // greedy + s.Plocks = make([]Plock, 0, len(locks)) + for _, lock := range locks { + owners, err := r.rdb.HGetAll(ctx, lock).Result() + if err != nil { + return nil, fmt.Errorf("HGetAll %s: %s", lock, err) + } + isFlock := strings.HasPrefix(lock, "lockf") + inode, _ := strconv.ParseUint(lock[5:], 10, 64) + for k, v := range owners { + parts := strings.Split(k, "_") + if parts[0] != sid { + continue + } + owner, _ := strconv.ParseUint(parts[1], 16, 64) + if isFlock { + s.Flocks = append(s.Flocks, Flock{Ino(inode), owner, v}) + } else { + s.Plocks = append(s.Plocks, Plock{Ino(inode), owner, []byte(v)}) + } + } + } + } + return &s, nil +} + +func (r *redisMeta) GetSession(sid uint64) (*Session, error) { + key := strconv.FormatUint(sid, 10) + score, err := r.rdb.ZScore(Background, allSessions, key).Result() + if err != nil { + return nil, err + } + s, err := r.getSession(key, true) + if err != nil { + return nil, err + } + s.Heartbeat = time.Unix(int64(score), 0) + return s, nil +} + +func (r *redisMeta) ListSessions() ([]*Session, error) { + keys, err := r.rdb.ZRangeWithScores(Background, allSessions, 0, -1).Result() + if err != nil { + return nil, err + } + sessions := make([]*Session, 0, len(keys)) + for _, k := range keys { + s, err := r.getSession(k.Member.(string), false) + if err != nil { + logger.Errorf("get session: %s", err) + continue + } + s.Heartbeat = time.Unix(int64(k.Score), 0) + sessions = append(sessions, s) + } + return sessions, nil +} + +func (r *redisMeta) sustained(sid uint64) string { + return "session" + strconv.FormatUint(sid, 10) +} + +func (r *redisMeta) lockedKey(sid uint64) string { + return "locked" + strconv.FormatUint(sid, 10) +} + +func (r *redisMeta) symKey(inode Ino) string { + return "s" + inode.String() +} + +func (r *redisMeta) inodeKey(inode Ino) string { + return "i" + inode.String() +} + +func (r *redisMeta) entryKey(parent Ino) string { + return "d" + parent.String() +} + +func (r *redisMeta) chunkKey(inode Ino, indx uint32) string { + return "c" + inode.String() + "_" + strconv.FormatInt(int64(indx), 10) +} + +func (r *redisMeta) sliceKey(chunkid uint64, size uint32) string { + return "k" + strconv.FormatUint(chunkid, 10) + "_" + strconv.FormatUint(uint64(size), 10) +} + +func (r *redisMeta) xattrKey(inode Ino) string { + return "x" + inode.String() +} + +func (r *redisMeta) flockKey(inode Ino) string { + return "lockf" + inode.String() +} + +func (r *redisMeta) ownerKey(owner uint64) string { + return fmt.Sprintf("%d_%016X", r.sid, owner) +} + +func (r *redisMeta) plockKey(inode Ino) string { + return "lockp" + inode.String() +} + +func (r *redisMeta) packEntry(_type uint8, inode Ino) []byte { + wb := utils.NewBuffer(9) + wb.Put8(_type) + wb.Put64(uint64(inode)) + return wb.Bytes() +} + +func (r *redisMeta) parseEntry(buf []byte) (uint8, Ino) { + if len(buf) != 9 { + panic("invalid entry") + } + return buf[0], Ino(binary.BigEndian.Uint64(buf[1:])) +} + +func (r *redisMeta) handleLuaResult(op string, res interface{}, err error, returnedIno *int64, returnedAttr *string) syscall.Errno { + if err != nil { + msg := err.Error() + if strings.Contains(msg, "NOSCRIPT") { + var err2 error + switch op { + case "lookup": + r.shaLookup, err2 = r.rdb.ScriptLoad(Background, scriptLookup).Result() + case "resolve": + r.shaResolve, err2 = r.rdb.ScriptLoad(Background, scriptResolve).Result() + default: + return syscall.ENOTSUP + } + if err2 == nil { + logger.Infof("loaded script succeed for %s", op) + return syscall.EAGAIN + } else { + logger.Warnf("load script %s: %s", op, err2) + return syscall.ENOTSUP + } + } + + fields := strings.Fields(msg) + lastError := fields[len(fields)-1] + switch lastError { + case "ENOENT": + return syscall.ENOENT + case "EACCESS": + return syscall.EACCES + case "ENOTDIR": + return syscall.ENOTDIR + case "ENOTSUP": + return syscall.ENOTSUP + default: + logger.Warnf("unexpected error for %s: %s", op, msg) + switch op { + case "lookup": + r.shaLookup = "" + case "resolve": + r.shaResolve = "" + } + return syscall.ENOTSUP + } + } + vals, ok := res.([]interface{}) + if !ok { + logger.Errorf("invalid script result: %v", res) + return syscall.ENOTSUP + } + *returnedIno, ok = vals[0].(int64) + if !ok { + logger.Errorf("invalid script result: %v", res) + return syscall.ENOTSUP + } + if vals[1] == nil { + return syscall.ENOTSUP + } + *returnedAttr, ok = vals[1].(string) + if !ok { + logger.Errorf("invalid script result: %v", res) + return syscall.ENOTSUP + } + return 0 +} + +func (r *redisMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno { + var foundIno Ino + var foundType uint8 + var encodedAttr []byte + var err error + entryKey := r.entryKey(parent) + if len(r.shaLookup) > 0 && attr != nil && !r.conf.CaseInsensi { + var res interface{} + var returnedIno int64 + var returnedAttr string + res, err = r.rdb.EvalSha(ctx, r.shaLookup, []string{entryKey, name}).Result() + if st := r.handleLuaResult("lookup", res, err, &returnedIno, &returnedAttr); st == 0 { + foundIno = Ino(returnedIno) + encodedAttr = []byte(returnedAttr) + } else if st == syscall.EAGAIN { + return r.doLookup(ctx, parent, name, inode, attr) + } else if st != syscall.ENOTSUP { + return st + } + } + if foundIno == 0 || len(encodedAttr) == 0 { + var buf []byte + buf, err = r.rdb.HGet(ctx, entryKey, name).Bytes() + if err != nil { + return errno(err) + } + foundType, foundIno = r.parseEntry(buf) + encodedAttr, err = r.rdb.Get(ctx, r.inodeKey(foundIno)).Bytes() + } + + if err == nil { + r.parseAttr(encodedAttr, attr) + } else if err == redis.Nil { // corrupt entry + logger.Warnf("no attribute for inode %d (%d, %s)", foundIno, parent, name) + *attr = Attr{Typ: foundType} + err = nil + } + *inode = foundIno + return errno(err) +} + +func (r *redisMeta) Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno { + if len(r.shaResolve) == 0 || r.conf.CaseInsensi { + return syscall.ENOTSUP + } + defer timeit(time.Now()) + parent = r.checkRoot(parent) + args := []string{parent.String(), path, + strconv.FormatUint(uint64(ctx.Uid()), 10), + strconv.FormatUint(uint64(ctx.Gid()), 10)} + res, err := r.rdb.EvalSha(ctx, r.shaResolve, args).Result() + var returnedIno int64 + var returnedAttr string + st := r.handleLuaResult("resolve", res, err, &returnedIno, &returnedAttr) + if st == 0 { + if inode != nil { + *inode = Ino(returnedIno) + } + r.parseAttr([]byte(returnedAttr), attr) + } else if st == syscall.EAGAIN { + return r.Resolve(ctx, parent, path, inode, attr) + } + return st +} + +func (r *redisMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { + a, err := r.rdb.Get(ctx, r.inodeKey(inode)).Bytes() + if err == nil { + r.parseAttr(a, attr) + } + return errno(err) +} + +type timeoutError interface { + Timeout() bool +} + +func shouldRetry(err error, retryOnFailure bool) bool { + switch err { + case redis.TxFailedErr: + return true + case io.EOF, io.ErrUnexpectedEOF: + return retryOnFailure + case nil, context.Canceled, context.DeadlineExceeded: + return false + } + + if v, ok := err.(timeoutError); ok && v.Timeout() { + return retryOnFailure + } + + s := err.Error() + if s == "ERR max number of clients reached" { + return true + } + ps := strings.SplitN(s, " ", 3) + switch ps[0] { + case "LOADING": + case "READONLY": + case "CLUSTERDOWN": + case "TRYAGAIN": + case "MOVED": + case "ASK": + case "ERR": + if len(ps) > 1 { + switch ps[1] { + case "DISABLE": + fallthrough + case "NOWRITE": + fallthrough + case "NOREAD": + return true + } + } + return false + default: + return false + } + return true +} + +func (r *redisMeta) txn(ctx Context, txf func(tx *redis.Tx) error, keys ...string) syscall.Errno { + if r.conf.ReadOnly { + return syscall.EROFS + } + var err error + var khash = fnv.New32() + _, _ = khash.Write([]byte(keys[0])) + l := &r.txlocks[int(khash.Sum32())%len(r.txlocks)] + start := time.Now() + defer func() { txDist.Observe(time.Since(start).Seconds()) }() + l.Lock() + defer l.Unlock() + // TODO: enable retry for some of idempodent transactions + var retryOnFailture = false + for i := 0; i < 50; i++ { + err = r.rdb.Watch(ctx, txf, keys...) + if shouldRetry(err, retryOnFailture) { + txRestart.Add(1) + time.Sleep(time.Millisecond * time.Duration(rand.Int()%((i+1)*(i+1)))) + continue + } + return errno(err) + } + return errno(err) +} + +func (r *redisMeta) Truncate(ctx Context, inode Ino, flags uint8, length uint64, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + f := r.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { r.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + return r.txn(ctx, func(tx *redis.Tx) error { + var t Attr + a, err := tx.Get(ctx, r.inodeKey(inode)).Bytes() + if err != nil { + return err + } + r.parseAttr(a, &t) + if t.Typ != TypeFile { + return syscall.EPERM + } + if length == t.Length { + if attr != nil { + *attr = t + } + return nil + } + newSpace := align4K(length) - align4K(t.Length) + if newSpace > 0 && r.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + var zeroChunks []uint32 + var left, right = t.Length, length + if left > right { + right, left = left, right + } + if (right-left)/ChunkSize >= 100 { + // super large + var cursor uint64 + var keys []string + for { + keys, cursor, err = tx.Scan(ctx, cursor, fmt.Sprintf("c%d_*", inode), 10000).Result() + if err != nil { + return err + } + for _, key := range keys { + indx, err := strconv.Atoi(strings.Split(key, "_")[1]) + if err != nil { + logger.Errorf("parse %s: %s", key, err) + continue + } + if uint64(indx) > left/ChunkSize && uint64(indx) < right/ChunkSize { + zeroChunks = append(zeroChunks, uint32(indx)) + } + } + if cursor <= 0 { + break + } + } + } else { + for i := left/ChunkSize + 1; i < right/ChunkSize; i++ { + zeroChunks = append(zeroChunks, uint32(i)) + } + } + t.Length = length + now := time.Now() + t.Mtime = now.Unix() + t.Mtimensec = uint32(now.Nanosecond()) + t.Ctime = now.Unix() + t.Ctimensec = uint32(now.Nanosecond()) + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&t), 0) + // zero out from left to right + var l = uint32(right - left) + if right > (left/ChunkSize+1)*ChunkSize { + l = ChunkSize - uint32(left%ChunkSize) + } + pipe.RPush(ctx, r.chunkKey(inode, uint32(left/ChunkSize)), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l)) + buf := marshalSlice(0, 0, 0, 0, ChunkSize) + for _, indx := range zeroChunks { + pipe.RPushX(ctx, r.chunkKey(inode, indx), buf) + } + if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 { + pipe.RPush(ctx, r.chunkKey(inode, uint32(right/ChunkSize)), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize))) + } + pipe.IncrBy(ctx, usedSpace, newSpace) + return nil + }) + if err == nil { + if attr != nil { + *attr = t + } + } + return err + }, r.inodeKey(inode)) +} + +func (r *redisMeta) Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64) syscall.Errno { + if mode&fallocCollapesRange != 0 && mode != fallocCollapesRange { + return syscall.EINVAL + } + if mode&fallocInsertRange != 0 && mode != fallocInsertRange { + return syscall.EINVAL + } + if mode == fallocInsertRange || mode == fallocCollapesRange { + return syscall.ENOTSUP + } + if mode&fallocPunchHole != 0 && mode&fallocKeepSize == 0 { + return syscall.EINVAL + } + if size == 0 { + return syscall.EINVAL + } + defer timeit(time.Now()) + f := r.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { r.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + return r.txn(ctx, func(tx *redis.Tx) error { + var t Attr + a, err := tx.Get(ctx, r.inodeKey(inode)).Bytes() + if err != nil { + return err + } + r.parseAttr(a, &t) + if t.Typ == TypeFIFO { + return syscall.EPIPE + } + if t.Typ != TypeFile { + return syscall.EPERM + } + length := t.Length + if off+size > t.Length { + if mode&fallocKeepSize == 0 { + length = off + size + } + } + + old := t.Length + if length > old && r.checkQuota(align4K(length)-align4K(old), 0) { + return syscall.ENOSPC + } + t.Length = length + now := time.Now() + t.Mtime = now.Unix() + t.Mtimensec = uint32(now.Nanosecond()) + t.Ctime = now.Unix() + t.Ctimensec = uint32(now.Nanosecond()) + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&t), 0) + if mode&(fallocZeroRange|fallocPunchHole) != 0 { + if off+size > old { + size = old - off + } + for size > 0 { + indx := uint32(off / ChunkSize) + coff := off % ChunkSize + l := size + if coff+size > ChunkSize { + l = ChunkSize - coff + } + pipe.RPush(ctx, r.chunkKey(inode, indx), marshalSlice(uint32(coff), 0, 0, 0, uint32(l))) + off += l + size -= l + } + } + pipe.IncrBy(ctx, usedSpace, align4K(length)-align4K(old)) + return nil + }) + return err + }, r.inodeKey(inode)) +} + +func (r *redisMeta) SetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + inode = r.checkRoot(inode) + defer func() { r.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + return r.txn(ctx, func(tx *redis.Tx) error { + var cur Attr + a, err := tx.Get(ctx, r.inodeKey(inode)).Bytes() + if err != nil { + return err + } + r.parseAttr(a, &cur) + if (set&(SetAttrUID|SetAttrGID)) != 0 && (set&SetAttrMode) != 0 { + attr.Mode |= (cur.Mode & 06000) + } + var changed bool + if (cur.Mode&06000) != 0 && (set&(SetAttrUID|SetAttrGID)) != 0 { + clearSUGID(ctx, &cur, attr) + changed = true + } + if set&SetAttrUID != 0 && cur.Uid != attr.Uid { + cur.Uid = attr.Uid + changed = true + } + if set&SetAttrGID != 0 && cur.Gid != attr.Gid { + cur.Gid = attr.Gid + changed = true + } + if set&SetAttrMode != 0 { + if ctx.Uid() != 0 && (attr.Mode&02000) != 0 { + if ctx.Gid() != cur.Gid { + attr.Mode &= 05777 + } + } + if attr.Mode != cur.Mode { + cur.Mode = attr.Mode + changed = true + } + } + now := time.Now() + if set&SetAttrAtime != 0 && (cur.Atime != attr.Atime || cur.Atimensec != attr.Atimensec) { + cur.Atime = attr.Atime + cur.Atimensec = attr.Atimensec + changed = true + } + if set&SetAttrAtimeNow != 0 { + cur.Atime = now.Unix() + cur.Atimensec = uint32(now.Nanosecond()) + changed = true + } + if set&SetAttrMtime != 0 && (cur.Mtime != attr.Mtime || cur.Mtimensec != attr.Mtimensec) { + cur.Mtime = attr.Mtime + cur.Mtimensec = attr.Mtimensec + changed = true + } + if set&SetAttrMtimeNow != 0 { + cur.Mtime = now.Unix() + cur.Mtimensec = uint32(now.Nanosecond()) + changed = true + } + if !changed { + *attr = cur + return nil + } + cur.Ctime = now.Unix() + cur.Ctimensec = uint32(now.Nanosecond()) + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&cur), 0) + return nil + }) + if err == nil { + *attr = cur + } + return err + }, r.inodeKey(inode)) +} + +func (m *redisMeta) doReadlink(ctx Context, inode Ino) ([]byte, error) { + return m.rdb.Get(ctx, m.symKey(inode)).Bytes() +} + +func (r *redisMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno { + if r.checkQuota(4<<10, 1) { + return syscall.ENOSPC + } + parent = r.checkRoot(parent) + var ino Ino + var err error + if parent == TrashInode { + var next int64 + next, err = r.incrCounter("nextTrash", 1) + ino = TrashInode + Ino(next) + } else { + ino, err = r.nextInode() + } + if err != nil { + return errno(err) + } + if attr == nil { + attr = &Attr{} + } + attr.Typ = _type + attr.Mode = mode & ^cumask + attr.Uid = ctx.Uid() + attr.Gid = ctx.Gid() + if _type == TypeDirectory { + attr.Nlink = 2 + attr.Length = 4 << 10 + } else { + attr.Nlink = 1 + if _type == TypeSymlink { + attr.Length = uint64(len(path)) + } else { + attr.Length = 0 + attr.Rdev = rdev + } + } + attr.Parent = parent + attr.Full = true + if inode != nil { + *inode = ino + } + + return r.txn(ctx, func(tx *redis.Tx) error { + var pattr Attr + a, err := tx.Get(ctx, r.inodeKey(parent)).Bytes() + if err != nil { + return err + } + r.parseAttr(a, &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + + buf, err := tx.HGet(ctx, r.entryKey(parent), name).Bytes() + if err != nil && err != redis.Nil { + return err + } + var foundIno Ino + var foundType uint8 + if err == nil { + foundType, foundIno = r.parseEntry(buf) + } else if r.conf.CaseInsensi { // err == redis.Nil + if entry := r.resolveCase(ctx, parent, name); entry != nil { + foundType, foundIno = entry.Attr.Typ, entry.Inode + } + } + if foundIno != 0 { + if _type == TypeFile || _type == TypeDirectory { // file for create, directory for subTrash + a, err = tx.Get(ctx, r.inodeKey(foundIno)).Bytes() + if err == nil { + r.parseAttr(a, attr) + } else if err == redis.Nil { + *attr = Attr{Typ: foundType, Parent: parent} // corrupt entry + } else { + return err + } + if inode != nil { + *inode = foundIno + } + } + return syscall.EEXIST + } + + now := time.Now() + if _type == TypeDirectory { + pattr.Nlink++ + } + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + attr.Atime = now.Unix() + attr.Atimensec = uint32(now.Nanosecond()) + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + if pattr.Mode&02000 != 0 || ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" { + attr.Gid = pattr.Gid + if _type == TypeDirectory && runtime.GOOS == "linux" { + attr.Mode |= pattr.Mode & 02000 + } + } + + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HSet(ctx, r.entryKey(parent), name, r.packEntry(_type, ino)) + pipe.Set(ctx, r.inodeKey(parent), r.marshal(&pattr), 0) + pipe.Set(ctx, r.inodeKey(ino), r.marshal(attr), 0) + if _type == TypeSymlink { + pipe.Set(ctx, r.symKey(ino), path, 0) + } + pipe.IncrBy(ctx, usedSpace, align4K(0)) + pipe.Incr(ctx, totalInodes) + return nil + }) + return err + }, r.inodeKey(parent), r.entryKey(parent)) +} + +func (r *redisMeta) doUnlink(ctx Context, parent Ino, name string) syscall.Errno { + buf, err := r.rdb.HGet(ctx, r.entryKey(parent), name).Bytes() + if err == redis.Nil && r.conf.CaseInsensi { + if e := r.resolveCase(ctx, parent, name); e != nil { + name = string(e.Name) + buf = r.packEntry(e.Attr.Typ, e.Inode) + err = nil + } + } + if err != nil { + return errno(err) + } + _type, inode := r.parseEntry(buf) + if _type == TypeDirectory { + return syscall.EPERM + } + keys := []string{r.entryKey(parent), r.inodeKey(parent), r.inodeKey(inode)} + var trash Ino + if st := r.checkTrash(parent, &trash); st != 0 { + return st + } + if trash > 0 { + keys = append(keys, r.entryKey(trash)) + } else { + defer func() { r.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + } + var opened bool + var attr Attr + eno := r.txn(ctx, func(tx *redis.Tx) error { + rs, _ := tx.MGet(ctx, r.inodeKey(parent), r.inodeKey(inode)).Result() + if rs[0] == nil { + return redis.Nil + } + var pattr Attr + r.parseAttr([]byte(rs[0].(string)), &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + now := time.Now() + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + attr = Attr{} + opened = false + if rs[1] != nil { + r.parseAttr([]byte(rs[1].(string)), &attr) + if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid { + return syscall.EACCES + } + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + if trash == 0 { + attr.Nlink-- + if _type == TypeFile && attr.Nlink == 0 { + opened = r.of.IsOpen(inode) + } + } else if attr.Nlink == 1 { // don't change parent if it has hard links + attr.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name) + trash = 0 + } + + buf, err := tx.HGet(ctx, r.entryKey(parent), name).Bytes() + if err != nil { + return err + } + _type2, inode2 := r.parseEntry(buf) + if _type2 != _type || inode2 != inode { + return syscall.EAGAIN + } + + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HDel(ctx, r.entryKey(parent), name) + pipe.Set(ctx, r.inodeKey(parent), r.marshal(&pattr), 0) + if attr.Nlink > 0 { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&attr), 0) + if trash > 0 { + pipe.HSet(ctx, r.entryKey(trash), fmt.Sprintf("%d-%d-%s", parent, inode, name), buf) + } + } else { + switch _type { + case TypeFile: + if opened { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&attr), 0) + pipe.SAdd(ctx, r.sustained(r.sid), strconv.Itoa(int(inode))) + } else { + pipe.ZAdd(ctx, delfiles, &redis.Z{Score: float64(now.Unix()), Member: r.toDelete(inode, attr.Length)}) + pipe.Del(ctx, r.inodeKey(inode)) + pipe.IncrBy(ctx, usedSpace, -align4K(attr.Length)) + pipe.Decr(ctx, totalInodes) + } + case TypeSymlink: + pipe.Del(ctx, r.symKey(inode)) + fallthrough + default: + pipe.Del(ctx, r.inodeKey(inode)) + pipe.IncrBy(ctx, usedSpace, -align4K(0)) + pipe.Decr(ctx, totalInodes) + } + pipe.Del(ctx, r.xattrKey(inode)) + } + return nil + }) + + return err + }, keys...) + if eno == 0 && _type == TypeFile && attr.Nlink == 0 { + r.fileDeleted(opened, inode, attr.Length) + } + return eno +} + +func (r *redisMeta) doRmdir(ctx Context, parent Ino, name string) syscall.Errno { + buf, err := r.rdb.HGet(ctx, r.entryKey(parent), name).Bytes() + if err == redis.Nil && r.conf.CaseInsensi { + if e := r.resolveCase(ctx, parent, name); e != nil { + name = string(e.Name) + buf = r.packEntry(e.Attr.Typ, e.Inode) + err = nil + } + } + if err != nil { + return errno(err) + } + typ, inode := r.parseEntry(buf) + if typ != TypeDirectory { + return syscall.ENOTDIR + } + + keys := []string{r.inodeKey(parent), r.entryKey(parent), r.inodeKey(inode), r.entryKey(inode)} + var trash Ino + if st := r.checkTrash(parent, &trash); st != 0 { + return st + } + if trash > 0 { + keys = append(keys, r.entryKey(trash)) + } + return r.txn(ctx, func(tx *redis.Tx) error { + rs, _ := tx.MGet(ctx, r.inodeKey(parent), r.inodeKey(inode)).Result() + if rs[0] == nil { + return redis.Nil + } + var pattr, attr Attr + r.parseAttr([]byte(rs[0].(string)), &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + now := time.Now() + pattr.Nlink-- + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + + buf, err := tx.HGet(ctx, r.entryKey(parent), name).Bytes() + if err != nil { + return err + } + typ, inode = r.parseEntry(buf) + if typ != TypeDirectory { + return syscall.ENOTDIR + } + + cnt, err := tx.HLen(ctx, r.entryKey(inode)).Result() + if err != nil { + return err + } + if cnt > 0 { + return syscall.ENOTEMPTY + } + if rs[1] != nil { + r.parseAttr([]byte(rs[1].(string)), &attr) + if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid { + return syscall.EACCES + } + if trash > 0 { + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + attr.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name) + trash = 0 + } + + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HDel(ctx, r.entryKey(parent), name) + pipe.Set(ctx, r.inodeKey(parent), r.marshal(&pattr), 0) + if trash > 0 { + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&attr), 0) + pipe.HSet(ctx, r.entryKey(trash), fmt.Sprintf("%d-%d-%s", parent, inode, name), buf) + } else { + pipe.Del(ctx, r.inodeKey(inode)) + pipe.Del(ctx, r.xattrKey(inode)) + pipe.IncrBy(ctx, usedSpace, -align4K(0)) + pipe.Decr(ctx, totalInodes) + } + return nil + }) + return err + }, keys...) +} + +func (r *redisMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno { + exchange := flags == RenameExchange + buf, err := r.rdb.HGet(ctx, r.entryKey(parentSrc), nameSrc).Bytes() + if err == redis.Nil && r.conf.CaseInsensi { + if e := r.resolveCase(ctx, parentSrc, nameSrc); e != nil { + nameSrc = string(e.Name) + buf = r.packEntry(e.Attr.Typ, e.Inode) + err = nil + } + } + if err != nil { + return errno(err) + } + typ, ino := r.parseEntry(buf) + if parentSrc == parentDst && nameSrc == nameDst { + if inode != nil { + *inode = ino + } + return 0 + } + buf, err = r.rdb.HGet(ctx, r.entryKey(parentDst), nameDst).Bytes() + if err == redis.Nil && r.conf.CaseInsensi { + if e := r.resolveCase(ctx, parentDst, nameDst); e != nil { + nameDst = string(e.Name) + buf = r.packEntry(e.Attr.Typ, e.Inode) + err = nil + } + } + if err != nil && err != redis.Nil { + return errno(err) + } + keys := []string{r.entryKey(parentSrc), r.inodeKey(parentSrc), r.inodeKey(ino), r.entryKey(parentDst), r.inodeKey(parentDst)} + var opened bool + var trash, dino Ino + var dtyp uint8 + var tattr Attr + if err == nil { + if st := r.checkTrash(parentDst, &trash); st != 0 { + return st + } + if trash > 0 { + keys = append(keys, r.entryKey(trash)) + } + dtyp, dino = r.parseEntry(buf) + keys = append(keys, r.inodeKey(dino)) + if dtyp == TypeDirectory { + keys = append(keys, r.entryKey(dino)) + } + } + eno := r.txn(ctx, func(tx *redis.Tx) error { + rs, _ := tx.MGet(ctx, r.inodeKey(parentSrc), r.inodeKey(parentDst), r.inodeKey(ino)).Result() + if rs[0] == nil || rs[1] == nil || rs[2] == nil { + return redis.Nil + } + var sattr, dattr, iattr Attr + r.parseAttr([]byte(rs[0].(string)), &sattr) + if sattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + r.parseAttr([]byte(rs[1].(string)), &dattr) + if dattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + r.parseAttr([]byte(rs[2].(string)), &iattr) + + dbuf, err := tx.HGet(ctx, r.entryKey(parentDst), nameDst).Bytes() + if err != nil && err != redis.Nil { + return err + } + now := time.Now() + tattr = Attr{} + opened = false + if err == nil { + if flags == RenameNoReplace { + return syscall.EEXIST + } + dtyp1, dino1 := r.parseEntry(dbuf) + if dino1 != dino || dtyp1 != dtyp { + return syscall.EAGAIN + } + a, err := tx.Get(ctx, r.inodeKey(dino)).Bytes() + if err == redis.Nil { + logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, nameDst) + trash = 0 + } else if err != nil { + return err + } + r.parseAttr(a, &tattr) + tattr.Ctime = now.Unix() + tattr.Ctimensec = uint32(now.Nanosecond()) + if exchange { + tattr.Parent = parentSrc + if dtyp == TypeDirectory && parentSrc != parentDst { + dattr.Nlink-- + sattr.Nlink++ + } + } else { + if dtyp == TypeDirectory { + cnt, err := tx.HLen(ctx, r.entryKey(dino)).Result() + if err != nil { + return err + } + if cnt != 0 { + return syscall.ENOTEMPTY + } + dattr.Nlink-- + if trash > 0 { + tattr.Parent = trash + } + } else { + if trash == 0 { + tattr.Nlink-- + if dtyp == TypeFile && tattr.Nlink == 0 { + opened = r.of.IsOpen(dino) + } + defer func() { r.of.InvalidateChunk(dino, 0xFFFFFFFE) }() + } else if tattr.Nlink == 1 { + tattr.Parent = trash + } + } + } + if ctx.Uid() != 0 && dattr.Mode&01000 != 0 && ctx.Uid() != dattr.Uid && ctx.Uid() != tattr.Uid { + return syscall.EACCES + } + } else { + if exchange { + return syscall.ENOENT + } + dino, dtyp = 0, 0 + } + buf, err := tx.HGet(ctx, r.entryKey(parentSrc), nameSrc).Bytes() + if err != nil { + return err + } + typ1, ino1 := r.parseEntry(buf) + if ino1 != ino || typ1 != typ { + return syscall.EAGAIN + } + if ctx.Uid() != 0 && sattr.Mode&01000 != 0 && ctx.Uid() != sattr.Uid && ctx.Uid() != iattr.Uid { + return syscall.EACCES + } + + sattr.Mtime = now.Unix() + sattr.Mtimensec = uint32(now.Nanosecond()) + sattr.Ctime = now.Unix() + sattr.Ctimensec = uint32(now.Nanosecond()) + dattr.Mtime = now.Unix() + dattr.Mtimensec = uint32(now.Nanosecond()) + dattr.Ctime = now.Unix() + dattr.Ctimensec = uint32(now.Nanosecond()) + iattr.Parent = parentDst + iattr.Ctime = now.Unix() + iattr.Ctimensec = uint32(now.Nanosecond()) + if typ == TypeDirectory && parentSrc != parentDst { + sattr.Nlink-- + dattr.Nlink++ + } + if inode != nil { + *inode = ino + } + if attr != nil { + *attr = iattr + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + if exchange { // dbuf, tattr are valid + pipe.HSet(ctx, r.entryKey(parentSrc), nameSrc, dbuf) + pipe.Set(ctx, r.inodeKey(dino), r.marshal(&tattr), 0) + } else { + pipe.HDel(ctx, r.entryKey(parentSrc), nameSrc) + if dino > 0 { + if trash > 0 { + pipe.Set(ctx, r.inodeKey(dino), r.marshal(&tattr), 0) + pipe.HSet(ctx, r.entryKey(trash), fmt.Sprintf("%d-%d-%s", parentDst, dino, nameDst), dbuf) + } else if dtyp != TypeDirectory && tattr.Nlink > 0 { + pipe.Set(ctx, r.inodeKey(dino), r.marshal(&tattr), 0) + } else { + if dtyp == TypeFile { + if opened { + pipe.Set(ctx, r.inodeKey(dino), r.marshal(&tattr), 0) + pipe.SAdd(ctx, r.sustained(r.sid), strconv.Itoa(int(dino))) + } else { + pipe.ZAdd(ctx, delfiles, &redis.Z{Score: float64(now.Unix()), Member: r.toDelete(dino, tattr.Length)}) + pipe.Del(ctx, r.inodeKey(dino)) + pipe.IncrBy(ctx, usedSpace, -align4K(tattr.Length)) + pipe.Decr(ctx, totalInodes) + } + } else { + if dtyp == TypeSymlink { + pipe.Del(ctx, r.symKey(dino)) + } + pipe.Del(ctx, r.inodeKey(dino)) + pipe.IncrBy(ctx, usedSpace, -align4K(0)) + pipe.Decr(ctx, totalInodes) + } + pipe.Del(ctx, r.xattrKey(dino)) + } + } + } + if parentDst != parentSrc && !isTrash(parentSrc) { + pipe.Set(ctx, r.inodeKey(parentSrc), r.marshal(&sattr), 0) + } + pipe.Set(ctx, r.inodeKey(ino), r.marshal(&iattr), 0) + pipe.HSet(ctx, r.entryKey(parentDst), nameDst, buf) + pipe.Set(ctx, r.inodeKey(parentDst), r.marshal(&dattr), 0) + return nil + }) + return err + }, keys...) + if eno == 0 && !exchange && dino > 0 && dtyp == TypeFile && tattr.Nlink == 0 { + r.fileDeleted(opened, dino, tattr.Length) + } + return eno +} + +func (r *redisMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno { + return r.txn(ctx, func(tx *redis.Tx) error { + rs, err := tx.MGet(ctx, r.inodeKey(parent), r.inodeKey(inode)).Result() + if err != nil { + return err + } + if rs[0] == nil || rs[1] == nil { + return redis.Nil + } + var pattr, iattr Attr + r.parseAttr([]byte(rs[0].(string)), &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + now := time.Now() + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + r.parseAttr([]byte(rs[1].(string)), &iattr) + if iattr.Typ == TypeDirectory { + return syscall.EPERM + } + iattr.Ctime = now.Unix() + iattr.Ctimensec = uint32(now.Nanosecond()) + iattr.Nlink++ + + err = tx.HGet(ctx, r.entryKey(parent), name).Err() + if err != nil && err != redis.Nil { + return err + } else if err == nil { + return syscall.EEXIST + } else if err == redis.Nil && r.conf.CaseInsensi && r.resolveCase(ctx, parent, name) != nil { + return syscall.EEXIST + } + + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HSet(ctx, r.entryKey(parent), name, r.packEntry(iattr.Typ, inode)) + pipe.Set(ctx, r.inodeKey(parent), r.marshal(&pattr), 0) + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&iattr), 0) + return nil + }) + if err == nil && attr != nil { + *attr = iattr + } + return err + }, r.inodeKey(inode), r.entryKey(parent), r.inodeKey(parent)) +} + +func (r *redisMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) syscall.Errno { + var keys []string + var cursor uint64 + var err error + for { + keys, cursor, err = r.rdb.HScan(ctx, r.entryKey(inode), cursor, "*", 10000).Result() + if err != nil { + return errno(err) + } + newEntries := make([]Entry, len(keys)/2) + newAttrs := make([]Attr, len(keys)/2) + for i := 0; i < len(keys); i += 2 { + typ, inode := r.parseEntry([]byte(keys[i+1])) + ent := &newEntries[i/2] + ent.Inode = inode + ent.Name = []byte(keys[i]) + ent.Attr = &newAttrs[i/2] + ent.Attr.Typ = typ + *entries = append(*entries, ent) + } + if cursor == 0 { + break + } + } + + if plus != 0 { + fillAttr := func(es []*Entry) error { + var keys = make([]string, len(es)) + for i, e := range es { + keys[i] = r.inodeKey(e.Inode) + } + rs, err := r.rdb.MGet(ctx, keys...).Result() + if err != nil { + return err + } + for j, re := range rs { + if re != nil { + if a, ok := re.(string); ok { + r.parseAttr([]byte(a), es[j].Attr) + } + } + } + return nil + } + batchSize := 4096 + nEntries := len(*entries) + if nEntries <= batchSize { + err = fillAttr(*entries) + } else { + indexCh := make(chan []*Entry, 10) + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for es := range indexCh { + e := fillAttr(es) + if e != nil { + err = e + break + } + } + }() + } + for i := 0; i < nEntries; i += batchSize { + if i+batchSize > nEntries { + indexCh <- (*entries)[i:] + } else { + indexCh <- (*entries)[i : i+batchSize] + } + } + close(indexCh) + wg.Wait() + } + if err != nil { + return errno(err) + } + } + return 0 +} + +func (r *redisMeta) doCleanStaleSession(sid uint64) { + // release locks + var ctx = Background + key := r.lockedKey(sid) + inodes, err := r.rdb.SMembers(ctx, key).Result() + if err != nil { + logger.Warnf("SMembers %s: %s", key, err) + return + } + ssid := strconv.FormatInt(int64(sid), 10) + for _, k := range inodes { + owners, _ := r.rdb.HKeys(ctx, k).Result() + for _, o := range owners { + if strings.Split(o, "_")[0] == ssid { + err = r.rdb.HDel(ctx, k, o).Err() + logger.Infof("cleanup lock on %s from session %d: %s", k, sid, err) + } + } + r.rdb.SRem(ctx, key, k) + } + + key = r.sustained(sid) + inodes, err = r.rdb.SMembers(ctx, key).Result() + if err != nil { + logger.Warnf("SMembers %s: %s", key, err) + return + } + done := true + for _, sinode := range inodes { + inode, _ := strconv.ParseInt(sinode, 10, 0) + if err := r.doDeleteSustainedInode(sid, Ino(inode)); err != nil { + logger.Errorf("Failed to delete inode %d: %s", inode, err) + done = false + } else { + r.rdb.SRem(ctx, key, sinode) + } + } + if done { + r.rdb.HDel(ctx, sessionInfos, ssid) + r.rdb.ZRem(ctx, allSessions, ssid) + logger.Infof("cleanup session %d", sid) + } +} + +func (r *redisMeta) CleanStaleSessions() { + rng := &redis.ZRangeBy{Max: strconv.Itoa(int(time.Now().Add(time.Minute * -5).Unix())), Count: 100} + staleSessions, _ := r.rdb.ZRangeByScore(Background, allSessions, rng).Result() + for _, ssid := range staleSessions { + sid, _ := strconv.Atoi(ssid) + r.doCleanStaleSession(uint64(sid)) + } +} + +func (r *redisMeta) refreshSession() { + for { + time.Sleep(time.Minute) + r.Lock() + if r.umounting { + r.Unlock() + return + } + r.rdb.ZAdd(Background, allSessions, &redis.Z{Score: float64(time.Now().Unix()), Member: strconv.Itoa(int(r.sid))}) + r.Unlock() + if _, err := r.Load(); err != nil { + logger.Warnf("reload setting: %s", err) + } + go r.CleanStaleSessions() + } +} + +func (r *redisMeta) doDeleteSustainedInode(sid uint64, inode Ino) error { + var attr Attr + var ctx = Background + a, err := r.rdb.Get(ctx, r.inodeKey(inode)).Bytes() + if err == redis.Nil { + return nil + } + if err != nil { + return err + } + r.parseAttr(a, &attr) + _, err = r.rdb.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.ZAdd(ctx, delfiles, &redis.Z{Score: float64(time.Now().Unix()), Member: r.toDelete(inode, attr.Length)}) + pipe.Del(ctx, r.inodeKey(inode)) + pipe.IncrBy(ctx, usedSpace, -align4K(attr.Length)) + pipe.Decr(ctx, totalInodes) + pipe.SRem(ctx, r.sustained(sid), strconv.Itoa(int(inode))) + return nil + }) + if err == nil { + go r.doDeleteFileData(inode, attr.Length) + } + return err +} + +func (r *redisMeta) Read(ctx Context, inode Ino, indx uint32, chunks *[]Slice) syscall.Errno { + f := r.of.find(inode) + if f != nil { + f.RLock() + defer f.RUnlock() + } + if cs, ok := r.of.ReadChunk(inode, indx); ok { + *chunks = cs + return 0 + } + defer timeit(time.Now()) + vals, err := r.rdb.LRange(ctx, r.chunkKey(inode, indx), 0, 1000000).Result() + if err != nil { + return errno(err) + } + ss := readSlices(vals) + *chunks = buildSlice(ss) + r.of.CacheChunk(inode, indx, *chunks) + if !r.conf.ReadOnly && (len(vals) >= 5 || len(*chunks) >= 5) { + go r.compactChunk(inode, indx, false) + } + return 0 +} + +func (r *redisMeta) Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice) syscall.Errno { + defer timeit(time.Now()) + f := r.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { r.of.InvalidateChunk(inode, indx) }() + var needCompact bool + eno := r.txn(ctx, func(tx *redis.Tx) error { + var attr Attr + a, err := tx.Get(ctx, r.inodeKey(inode)).Bytes() + if err != nil { + return err + } + r.parseAttr(a, &attr) + if attr.Typ != TypeFile { + return syscall.EPERM + } + newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len) + var added int64 + if newleng > attr.Length { + added = align4K(newleng) - align4K(attr.Length) + attr.Length = newleng + } + if r.checkQuota(added, 0) { + return syscall.ENOSPC + } + now := time.Now() + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + + var rpush *redis.IntCmd + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + rpush = pipe.RPush(ctx, r.chunkKey(inode, indx), marshalSlice(off, slice.Chunkid, slice.Size, slice.Off, slice.Len)) + // most of chunk are used by single inode, so use that as the default (1 == not exists) + // pipe.Incr(ctx, r.sliceKey(slice.Chunkid, slice.Size)) + pipe.Set(ctx, r.inodeKey(inode), r.marshal(&attr), 0) + if added > 0 { + pipe.IncrBy(ctx, usedSpace, added) + } + return nil + }) + if err == nil { + needCompact = rpush.Val()%100 == 99 + } + return err + }, r.inodeKey(inode)) + if eno == 0 && needCompact { + go r.compactChunk(inode, indx, false) + } + return eno +} + +func (r *redisMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied *uint64) syscall.Errno { + defer timeit(time.Now()) + f := r.of.find(fout) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { r.of.InvalidateChunk(fout, 0xFFFFFFFF) }() + return r.txn(ctx, func(tx *redis.Tx) error { + rs, err := tx.MGet(ctx, r.inodeKey(fin), r.inodeKey(fout)).Result() + if err != nil { + return err + } + if rs[0] == nil || rs[1] == nil { + return redis.Nil + } + var sattr Attr + r.parseAttr([]byte(rs[0].(string)), &sattr) + if sattr.Typ != TypeFile { + return syscall.EINVAL + } + if offIn >= sattr.Length { + *copied = 0 + return nil + } + if offIn+size > sattr.Length { + size = sattr.Length - offIn + } + var attr Attr + r.parseAttr([]byte(rs[1].(string)), &attr) + if attr.Typ != TypeFile { + return syscall.EINVAL + } + + newleng := offOut + size + var added int64 + if newleng > attr.Length { + added = align4K(newleng) - align4K(attr.Length) + attr.Length = newleng + } + if r.checkQuota(added, 0) { + return syscall.ENOSPC + } + now := time.Now() + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + + p := tx.Pipeline() + for i := offIn / ChunkSize; i <= (offIn+size)/ChunkSize; i++ { + p.LRange(ctx, r.chunkKey(fin, uint32(i)), 0, 1000000) + } + vals, err := p.Exec(ctx) + if err != nil { + return err + } + + _, err = tx.Pipelined(ctx, func(pipe redis.Pipeliner) error { + coff := offIn / ChunkSize * ChunkSize + for _, v := range vals { + sv := v.(*redis.StringSliceCmd).Val() + // Add a zero chunk for hole + ss := append([]*slice{{len: ChunkSize}}, readSlices(sv)...) + cs := buildSlice(ss) + tpos := coff + for _, s := range cs { + pos := tpos + tpos += uint64(s.Len) + if pos < offIn+size && pos+uint64(s.Len) > offIn { + if pos < offIn { + dec := offIn - pos + s.Off += uint32(dec) + pos += dec + s.Len -= uint32(dec) + } + if pos+uint64(s.Len) > offIn+size { + dec := pos + uint64(s.Len) - (offIn + size) + s.Len -= uint32(dec) + } + doff := pos - offIn + offOut + indx := uint32(doff / ChunkSize) + dpos := uint32(doff % ChunkSize) + if dpos+s.Len > ChunkSize { + pipe.RPush(ctx, r.chunkKey(fout, indx), marshalSlice(dpos, s.Chunkid, s.Size, s.Off, ChunkSize-dpos)) + if s.Chunkid > 0 { + pipe.HIncrBy(ctx, sliceRefs, r.sliceKey(s.Chunkid, s.Size), 1) + } + + skip := ChunkSize - dpos + pipe.RPush(ctx, r.chunkKey(fout, indx+1), marshalSlice(0, s.Chunkid, s.Size, s.Off+skip, s.Len-skip)) + if s.Chunkid > 0 { + pipe.HIncrBy(ctx, sliceRefs, r.sliceKey(s.Chunkid, s.Size), 1) + } + } else { + pipe.RPush(ctx, r.chunkKey(fout, indx), marshalSlice(dpos, s.Chunkid, s.Size, s.Off, s.Len)) + if s.Chunkid > 0 { + pipe.HIncrBy(ctx, sliceRefs, r.sliceKey(s.Chunkid, s.Size), 1) + } + } + } + } + coff += ChunkSize + } + pipe.Set(ctx, r.inodeKey(fout), r.marshal(&attr), 0) + if added > 0 { + pipe.IncrBy(ctx, usedSpace, added) + } + return nil + }) + if err == nil { + *copied = size + } + return err + }, r.inodeKey(fout), r.inodeKey(fin)) +} + +func (r *redisMeta) cleanupDeletedFiles() { + for { + time.Sleep(time.Minute) + now := time.Now() + members, _ := r.rdb.ZRangeByScore(Background, delfiles, &redis.ZRangeBy{Min: strconv.Itoa(0), Max: strconv.Itoa(int(now.Add(-time.Hour).Unix())), Count: 1000}).Result() + for _, member := range members { + ps := strings.Split(member, ":") + inode, _ := strconv.ParseInt(ps[0], 10, 0) + var length int64 = 1 << 30 + if len(ps) == 2 { + length, _ = strconv.ParseInt(ps[1], 10, 0) + } else if len(ps) > 2 { + length, _ = strconv.ParseInt(ps[2], 10, 0) + } + logger.Debugf("cleanup chunks of inode %d with %d bytes (%s)", inode, length, member) + r.doDeleteFileData_(Ino(inode), uint64(length), member) + } + } +} + +func (r *redisMeta) cleanupSlices() { + for { + time.Sleep(time.Hour) + + // once per hour + var ctx = Background + last, _ := r.rdb.Get(ctx, "nextCleanupSlices").Uint64() + now := time.Now().Unix() + if last+3600 > uint64(now) { + continue + } + r.rdb.Set(ctx, "nextCleanupSlices", now, 0) + r.doCleanupSlices() + } +} + +func (r *redisMeta) doCleanupSlices() { + var ctx = Background + var ckeys []string + var cursor uint64 + var err error + for { + ckeys, cursor, err = r.rdb.HScan(ctx, sliceRefs, cursor, "*", 1000).Result() + if err != nil { + logger.Errorf("scan slices: %s", err) + break + } + if len(ckeys) > 0 { + values, err := r.rdb.HMGet(ctx, sliceRefs, ckeys...).Result() + if err != nil { + logger.Warnf("mget slices: %s", err) + break + } + for i, v := range values { + if v == nil { + continue + } + if strings.HasPrefix(v.(string), "-") { // < 0 + ps := strings.Split(ckeys[i], "_") + if len(ps) == 2 { + chunkid, _ := strconv.ParseUint(ps[0][1:], 10, 64) + size, _ := strconv.ParseUint(ps[1], 10, 32) + if chunkid > 0 && size > 0 { + r.deleteSlice(chunkid, uint32(size)) + } + } + } else if v == "0" { + r.cleanupZeroRef(ckeys[i]) + } + } + } + if cursor == 0 { + break + } + } +} + +func (r *redisMeta) cleanupZeroRef(key string) { + var ctx = Background + _ = r.txn(ctx, func(tx *redis.Tx) error { + v, err := tx.HGet(ctx, sliceRefs, key).Int() + if err != nil { + return err + } + if v != 0 { + return syscall.EINVAL + } + _, err = tx.Pipelined(ctx, func(p redis.Pipeliner) error { + p.HDel(ctx, sliceRefs, key) + return nil + }) + return err + }, sliceRefs) +} + +func (r *redisMeta) cleanupLeakedChunks() { + var ctx = Background + var ckeys []string + var cursor uint64 + var err error + for { + ckeys, cursor, err = r.rdb.Scan(ctx, cursor, "c*", 1000).Result() + if err != nil { + logger.Errorf("scan all chunks: %s", err) + break + } + var ikeys []string + var rs []*redis.IntCmd + p := r.rdb.Pipeline() + for _, k := range ckeys { + ps := strings.Split(k, "_") + if len(ps) != 2 { + continue + } + ino, _ := strconv.ParseInt(ps[0][1:], 10, 0) + ikeys = append(ikeys, k) + rs = append(rs, p.Exists(ctx, r.inodeKey(Ino(ino)))) + } + if len(rs) > 0 { + _, err = p.Exec(ctx) + if err != nil { + logger.Errorf("check inodes: %s", err) + return + } + for i, rr := range rs { + if rr.Val() == 0 { + key := ikeys[i] + logger.Infof("found leaked chunk %s", key) + ps := strings.Split(key, "_") + ino, _ := strconv.ParseInt(ps[0][1:], 10, 0) + indx, _ := strconv.Atoi(ps[1]) + _ = r.deleteChunk(Ino(ino), uint32(indx)) + } + } + } + if cursor == 0 { + break + } + } +} + +func (r *redisMeta) cleanupOldSliceRefs() { + var ctx = Background + var ckeys []string + var cursor uint64 + var err error + for { + ckeys, cursor, err = r.rdb.Scan(ctx, cursor, "k*", 1000).Result() + if err != nil { + logger.Errorf("scan slices: %s", err) + break + } + if len(ckeys) > 0 { + values, err := r.rdb.MGet(ctx, ckeys...).Result() + if err != nil { + logger.Warnf("mget slices: %s", err) + break + } + var todel []string + for i, v := range values { + if v == nil { + continue + } + if strings.HasPrefix(v.(string), "-") || v == "0" { // < 0 + // the objects will be deleted by gc + todel = append(todel, ckeys[i]) + } else { + vv, _ := strconv.Atoi(v.(string)) + r.rdb.HIncrBy(ctx, sliceRefs, ckeys[i], int64(vv)) + r.rdb.DecrBy(ctx, ckeys[i], int64(vv)) + logger.Infof("move refs %d for slice %s", vv, ckeys[i]) + } + } + r.rdb.Del(ctx, todel...) + } + if cursor == 0 { + break + } + } +} + +func (r *redisMeta) toDelete(inode Ino, length uint64) string { + return inode.String() + ":" + strconv.Itoa(int(length)) +} + +func (r *redisMeta) deleteChunk(inode Ino, indx uint32) error { + var ctx = Background + key := r.chunkKey(inode, indx) + for { + var slices []*slice + var rs []*redis.IntCmd + err := r.txn(ctx, func(tx *redis.Tx) error { + slices = nil + vals, err := tx.LRange(ctx, key, 0, 100).Result() + if err == redis.Nil { + return nil + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + for _, v := range vals { + rb := utils.ReadBuffer([]byte(v)) + _ = rb.Get32() // pos + chunkid := rb.Get64() + size := rb.Get32() + slices = append(slices, &slice{chunkid: chunkid, size: size}) + pipe.LPop(ctx, key) + rs = append(rs, pipe.HIncrBy(ctx, sliceRefs, r.sliceKey(chunkid, size), -1)) + } + return nil + }) + return err + }, key) + if err != syscall.Errno(0) { + return fmt.Errorf("delete slice from chunk %s fail: %s, retry later", key, err) + } + for i, s := range slices { + if rs[i].Val() < 0 { + r.deleteSlice(s.chunkid, s.size) + } + } + if len(slices) < 100 { + break + } + } + return nil +} + +func (r *redisMeta) doDeleteFileData(inode Ino, length uint64) { + r.doDeleteFileData_(inode, length, "") +} + +func (r *redisMeta) doDeleteFileData_(inode Ino, length uint64, tracking string) { + var ctx = Background + var indx uint32 + p := r.rdb.Pipeline() + for uint64(indx)*ChunkSize < length { + var keys []string + for i := 0; uint64(indx)*ChunkSize < length && i < 1000; i++ { + key := r.chunkKey(inode, indx) + keys = append(keys, key) + _ = p.LLen(ctx, key) + indx++ + } + cmds, err := p.Exec(ctx) + if err != nil { + logger.Warnf("delete chunks of inode %d: %s", inode, err) + return + } + for i, cmd := range cmds { + val, err := cmd.(*redis.IntCmd).Result() + if err == redis.Nil || val == 0 { + continue + } + idx, _ := strconv.Atoi(strings.Split(keys[i], "_")[1]) + err = r.deleteChunk(inode, uint32(idx)) + if err != nil { + logger.Warnf("delete chunk %s: %s", keys[i], err) + return + } + } + } + if tracking == "" { + tracking = inode.String() + ":" + strconv.FormatInt(int64(length), 10) + } + _ = r.rdb.ZRem(ctx, delfiles, tracking) +} + +func (r *redisMeta) compactChunk(inode Ino, indx uint32, force bool) { + // avoid too many or duplicated compaction + if !force { + r.Lock() + k := uint64(inode) + (uint64(indx) << 32) + if len(r.compacting) > 10 || r.compacting[k] { + r.Unlock() + return + } + r.compacting[k] = true + r.Unlock() + defer func() { + r.Lock() + delete(r.compacting, k) + r.Unlock() + }() + } + + var ctx = Background + vals, err := r.rdb.LRange(ctx, r.chunkKey(inode, indx), 0, 1000).Result() + if err != nil { + return + } + + ss := readSlices(vals) + skipped := skipSome(ss) + ss = ss[skipped:] + pos, size, chunks := compactChunk(ss) + if len(ss) < 2 || size == 0 { + return + } + + var chunkid uint64 + st := r.NewChunk(ctx, &chunkid) + if st != 0 { + return + } + logger.Debugf("compact %d:%d: skipped %d slices (%d bytes) %d slices (%d bytes)", inode, indx, skipped, pos, len(ss), size) + err = r.newMsg(CompactChunk, chunks, chunkid) + if err != nil { + if !strings.Contains(err.Error(), "not exist") && !strings.Contains(err.Error(), "not found") { + logger.Warnf("compact %d %d with %d slices: %s", inode, indx, len(ss), err) + } + return + } + var rs []*redis.IntCmd + key := r.chunkKey(inode, indx) + errno := r.txn(ctx, func(tx *redis.Tx) error { + rs = nil + vals2, err := tx.LRange(ctx, key, 0, int64(len(vals)-1)).Result() + if err != nil { + return err + } + if len(vals2) != len(vals) { + return syscall.EINVAL + } + for i, val := range vals2 { + if val != vals[i] { + return syscall.EINVAL + } + } + + _, err = tx.Pipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.LTrim(ctx, key, int64(len(vals)), -1) + pipe.LPush(ctx, key, marshalSlice(pos, chunkid, size, 0, size)) + for i := skipped; i > 0; i-- { + pipe.LPush(ctx, key, vals[i-1]) + } + pipe.HSet(ctx, sliceRefs, r.sliceKey(chunkid, size), "0") // create the key to tracking it + for _, s := range ss { + rs = append(rs, pipe.HIncrBy(ctx, sliceRefs, r.sliceKey(s.chunkid, s.size), -1)) + } + return nil + }) + return err + }, key) + // there could be false-negative that the compaction is successful, double-check + if errno != 0 && errno != syscall.EINVAL { + if e := r.rdb.HGet(ctx, sliceRefs, r.sliceKey(chunkid, size)).Err(); e == redis.Nil { + errno = syscall.EINVAL // failed + } else if e == nil { + errno = 0 // successful + } + } + + if errno == syscall.EINVAL { + r.rdb.HIncrBy(ctx, sliceRefs, r.sliceKey(chunkid, size), -1) + logger.Infof("compaction for %d:%d is wasted, delete slice %d (%d bytes)", inode, indx, chunkid, size) + r.deleteSlice(chunkid, size) + } else if errno == 0 { + r.of.InvalidateChunk(inode, indx) + r.cleanupZeroRef(r.sliceKey(chunkid, size)) + for i, s := range ss { + if rs[i].Err() == nil && rs[i].Val() < 0 { + r.deleteSlice(s.chunkid, s.size) + } + } + if r.rdb.LLen(ctx, r.chunkKey(inode, indx)).Val() > 5 { + go func() { + // wait for the current compaction to finish + time.Sleep(time.Millisecond * 10) + r.compactChunk(inode, indx, force) + }() + } + } else { + logger.Warnf("compact %s: %s", key, errno) + } +} + +func (r *redisMeta) CompactAll(ctx Context, bar *utils.Bar) syscall.Errno { + var cursor uint64 + p := r.rdb.Pipeline() + + for { + keys, c, err := r.rdb.Scan(ctx, cursor, "c*_*", 10000).Result() + if err != nil { + logger.Warnf("scan chunks: %s", err) + return errno(err) + } + bar.IncrTotal(int64(len(keys))) + for _, key := range keys { + _ = p.LLen(ctx, key) + } + cmds, err := p.Exec(ctx) + if err != nil { + logger.Warnf("list slices: %s", err) + return errno(err) + } + for i, cmd := range cmds { + cnt := cmd.(*redis.IntCmd).Val() + if cnt > 1 { + var inode uint64 + var indx uint32 + n, err := fmt.Sscanf(keys[i], "c%d_%d", &inode, &indx) + if err == nil && n == 2 { + logger.Debugf("compact chunk %d:%d (%d slices)", inode, indx, cnt) + r.compactChunk(Ino(inode), indx, true) + } + } + bar.Increment() + } + if c == 0 { + break + } + cursor = c + } + return 0 +} + +func (r *redisMeta) cleanupLeakedInodes(delete bool) { + var ctx = Background + var keys []string + var cursor uint64 + var err error + var foundInodes = make(map[Ino]struct{}) + cutoff := time.Now().Add(time.Hour * -1) + for { + keys, cursor, err = r.rdb.Scan(ctx, cursor, "d*", 1000).Result() + if err != nil { + logger.Errorf("scan dentry: %s", err) + return + } + if len(keys) > 0 { + for _, key := range keys { + ino, _ := strconv.Atoi(key[1:]) + var entries []*Entry + eno := r.Readdir(ctx, Ino(ino), 0, &entries) + if eno != syscall.ENOENT && eno != 0 { + logger.Errorf("readdir %d: %s", ino, eno) + return + } + for _, e := range entries { + foundInodes[e.Inode] = struct{}{} + } + } + } + if cursor == 0 { + break + } + } + for { + keys, cursor, err = r.rdb.Scan(ctx, cursor, "i*", 1000).Result() + if err != nil { + logger.Errorf("scan inodes: %s", err) + break + } + if len(keys) > 0 { + values, err := r.rdb.MGet(ctx, keys...).Result() + if err != nil { + logger.Warnf("mget inodes: %s", err) + break + } + for i, v := range values { + if v == nil { + continue + } + var attr Attr + r.parseAttr([]byte(v.(string)), &attr) + ino, _ := strconv.Atoi(keys[i][1:]) + if _, ok := foundInodes[Ino(ino)]; !ok && time.Unix(attr.Ctime, 0).Before(cutoff) { + logger.Infof("found dangling inode: %s %+v", keys[i], attr) + if delete { + err = r.doDeleteSustainedInode(0, Ino(ino)) + if err != nil { + logger.Errorf("delete leaked inode %d : %s", ino, err) + } + } + } + } + } + if cursor == 0 { + break + } + } +} + +func (r *redisMeta) ListSlices(ctx Context, slices map[Ino][]Slice, delete bool, showProgress func()) syscall.Errno { + r.cleanupLeakedInodes(delete) + r.cleanupLeakedChunks() + r.cleanupOldSliceRefs() + if delete { + r.doCleanupSlices() + } + + var cursor uint64 + p := r.rdb.Pipeline() + for { + keys, c, err := r.rdb.Scan(ctx, cursor, "c*_*", 10000).Result() + if err != nil { + logger.Warnf("scan chunks: %s", err) + return errno(err) + } + for _, key := range keys { + _ = p.LRange(ctx, key, 0, 100000000) + } + cmds, err := p.Exec(ctx) + if err != nil { + logger.Warnf("list slices: %s", err) + return errno(err) + } + for _, cmd := range cmds { + key := cmd.(*redis.StringSliceCmd).Args()[1].(string) + inode, _ := strconv.Atoi(strings.Split(key[1:], "_")[0]) + vals := cmd.(*redis.StringSliceCmd).Val() + ss := readSlices(vals) + for _, s := range ss { + if s.chunkid > 0 { + slices[Ino(inode)] = append(slices[Ino(inode)], Slice{Chunkid: s.chunkid, Size: s.size}) + if showProgress != nil { + showProgress() + } + } + } + } + if c == 0 { + break + } + cursor = c + } + return 0 +} + +func (r *redisMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = r.checkRoot(inode) + var err error + *vbuff, err = r.rdb.HGet(ctx, r.xattrKey(inode), name).Bytes() + if err == redis.Nil { + err = ENOATTR + } + return errno(err) +} + +func (r *redisMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = r.checkRoot(inode) + vals, err := r.rdb.HKeys(ctx, r.xattrKey(inode)).Result() + if err != nil { + return errno(err) + } + *names = nil + for _, name := range vals { + *names = append(*names, []byte(name)...) + *names = append(*names, 0) + } + return 0 +} + +func (r *redisMeta) SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = r.checkRoot(inode) + c := Background + key := r.xattrKey(inode) + return r.txn(ctx, func(tx *redis.Tx) error { + switch flags { + case XattrCreate: + ok, err := tx.HSetNX(c, key, name, value).Result() + if err != nil { + return err + } + if !ok { + return syscall.EEXIST + } + return nil + case XattrReplace: + if ok, err := tx.HExists(c, key, name).Result(); err != nil { + return err + } else if !ok { + return ENOATTR + } + _, err := r.rdb.HSet(ctx, key, name, value).Result() + return err + default: // XattrCreateOrReplace + _, err := r.rdb.HSet(ctx, key, name, value).Result() + return err + } + }, key) +} + +func (r *redisMeta) RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = r.checkRoot(inode) + n, err := r.rdb.HDel(ctx, r.xattrKey(inode), name).Result() + if err != nil { + return errno(err) + } else if n == 0 { + return ENOATTR + } else { + return 0 + } +} + +func (r *redisMeta) checkServerConfig() { + rawInfo, err := r.rdb.Info(Background).Result() + if err != nil { + logger.Warnf("parse info: %s", err) + return + } + _, err = checkRedisInfo(rawInfo) + if err != nil { + logger.Warnf("parse info: %s", err) + } + + start := time.Now() + _ = r.rdb.Ping(Background) + logger.Infof("Ping redis: %s", time.Since(start)) +} + +func (m *redisMeta) dumpEntry(inode Ino) (*DumpedEntry, error) { + ctx := Background + e := &DumpedEntry{} + st := m.txn(ctx, func(tx *redis.Tx) error { + a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes() + if err != nil { + return err + } + attr := &Attr{} + m.parseAttr(a, attr) + e.Attr = dumpAttr(attr) + e.Attr.Inode = inode + + keys, err := tx.HGetAll(ctx, m.xattrKey(inode)).Result() + if err != nil { + return err + } + if len(keys) > 0 { + xattrs := make([]*DumpedXattr, 0, len(keys)) + for k, v := range keys { + xattrs = append(xattrs, &DumpedXattr{k, v}) + } + sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name }) + e.Xattrs = xattrs + } + + if attr.Typ == TypeFile { + for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ { + vals, err := tx.LRange(ctx, m.chunkKey(inode, indx), 0, 1000000).Result() + if err != nil { + return err + } + ss := readSlices(vals) + slices := make([]*DumpedSlice, 0, len(ss)) + for _, s := range ss { + slices = append(slices, &DumpedSlice{Chunkid: s.chunkid, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len}) + } + e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices}) + } + } else if attr.Typ == TypeSymlink { + if e.Symlink, err = tx.Get(ctx, m.symKey(inode)).Result(); err != nil { + return err + } + } + + return nil + }, m.inodeKey(inode)) + if st == 0 { + return e, nil + } else { + return nil, fmt.Errorf("dump entry error: %d", st) + } +} + +func (m *redisMeta) dumpEntryFast(inode Ino) *DumpedEntry { + e := &DumpedEntry{} + a := []byte(m.snap.stringMap[m.inodeKey(inode)]) + if len(a) == 0 { + if inode != TrashInode { + logger.Warnf("The entry of the inode was not found. inode: %v", inode) + } + return nil + } + attr := &Attr{} + m.parseAttr(a, attr) + e.Attr = dumpAttr(attr) + e.Attr.Inode = inode + + keys := m.snap.hashMap[m.xattrKey(inode)] + if len(keys) > 0 { + xattrs := make([]*DumpedXattr, 0, len(keys)) + for k, v := range keys { + xattrs = append(xattrs, &DumpedXattr{k, v}) + } + sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name }) + e.Xattrs = xattrs + } + + if attr.Typ == TypeFile { + for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ { + vals := m.snap.listMap[m.chunkKey(inode, indx)] + ss := readSlices(vals) + slices := make([]*DumpedSlice, 0, len(ss)) + for _, s := range ss { + slices = append(slices, &DumpedSlice{Chunkid: s.chunkid, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len}) + } + e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices}) + } + } else if attr.Typ == TypeSymlink { + if m.snap.stringMap[m.symKey(inode)] == "" { + logger.Warnf("The symlink of inode %d is not found", inode) + } else { + e.Symlink = m.snap.stringMap[m.symKey(inode)] + } + } + return e +} + +func (m *redisMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth int, showProgress func(totalIncr, currentIncr int64)) error { + bwWrite := func(s string) { + if _, err := bw.WriteString(s); err != nil { + panic(err) + } + } + var err error + var dirs map[string]string + if m.snap != nil { + dirs = m.snap.hashMap[m.entryKey(inode)] + } else { + dirs, err = m.rdb.HGetAll(context.Background(), m.entryKey(inode)).Result() + if err != nil { + return err + } + } + + if showProgress != nil { + showProgress(int64(len(dirs)), 0) + } + if err = tree.writeJsonWithOutEntry(bw, depth); err != nil { + return err + } + var sortedName []string + for name := range dirs { + sortedName = append(sortedName, name) + } + sort.Slice(sortedName, func(i, j int) bool { return sortedName[i] < sortedName[j] }) + for idx, name := range sortedName { + typ, inode := m.parseEntry([]byte(dirs[name])) + var entry *DumpedEntry + if m.snap != nil { + entry = m.dumpEntryFast(inode) + } else { + entry, err = m.dumpEntry(inode) + if err != nil { + return err + } + } + if entry == nil { + continue + } + + entry.Name = name + if typ == TypeDirectory { + err = m.dumpDir(inode, entry, bw, depth+2, showProgress) + } else { + err = entry.writeJSON(bw, depth+2) + } + if err != nil { + return err + } + if idx != len(sortedName)-1 { + bwWrite(",") + } + if showProgress != nil { + showProgress(0, 1) + } + } + bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth))) + return nil +} + +type redisSnap struct { + stringMap map[string]string //i* s* + listMap map[string][]string //c* + hashMap map[string]map[string]string //d*(included delfiles) x* +} + +func (m *redisMeta) makeSnap(bar *utils.Bar) error { + m.snap = &redisSnap{ + stringMap: make(map[string]string), + listMap: make(map[string][]string), + hashMap: make(map[string]map[string]string), + } + ctx := context.Background() + + listType := func(keys []string) error { + p := m.rdb.Pipeline() + for _, key := range keys { + p.LRange(ctx, key, 0, -1) + } + cmds, err := p.Exec(ctx) + if err != nil { + return err + } + for _, cmd := range cmds { + if sliceCmd, ok := cmd.(*redis.StringSliceCmd); ok { + if key, ok := cmd.Args()[1].(string); ok { + m.snap.listMap[key] = sliceCmd.Val() + } + } + bar.Increment() + } + + return nil + } + + stringType := func(keys []string) error { + values, err := m.rdb.MGet(ctx, keys...).Result() + if err != nil { + return err + } + for i := 0; i < len(keys); i++ { + if s, ok := values[i].(string); ok { + m.snap.stringMap[keys[i]] = s + } + bar.Increment() + } + return nil + } + + hashType := func(keys []string) error { + p := m.rdb.Pipeline() + for _, key := range keys { + if key == delfiles { + continue + } + p.HGetAll(ctx, key) + } + cmds, err := p.Exec(ctx) + if err != nil { + return err + } + for _, cmd := range cmds { + if stringMapCmd, ok := cmd.(*redis.StringStringMapCmd); ok { + if key, ok := cmd.Args()[1].(string); ok { + m.snap.hashMap[key] = stringMapCmd.Val() + } + } + bar.Increment() + } + return nil + } + + typeMap := map[string]func(keys []string) error{ + "c*": listType, + "i*": stringType, + "s*": stringType, + "d*": hashType, + "x*": hashType, + } + + scanner := func(match string, handlerKey func(keys []string) error) error { + var cursor uint64 + for { + keys, c, err := m.rdb.Scan(ctx, cursor, match, 10000).Result() + if err != nil { + return err + } + if len(keys) > 0 { + if err = handlerKey(keys); err != nil { + return err + } + } + if c == 0 { + break + } + cursor = c + } + return nil + } + + for match, typ := range typeMap { + if err := scanner(match, typ); err != nil { + return err + } + } + return nil +} + +func (m *redisMeta) DumpMeta(w io.Writer, root Ino) (err error) { + defer func() { + if p := recover(); p != nil { + if e, ok := p.(error); ok { + err = e + } else { + err = errors.Errorf("DumpMeta error: %v", p) + } + } + }() + ctx := Background + zs, err := m.rdb.ZRangeWithScores(ctx, delfiles, 0, -1).Result() + if err != nil { + return err + } + dels := make([]*DumpedDelFile, 0, len(zs)) + for _, z := range zs { + parts := strings.Split(z.Member.(string), ":") + if len(parts) != 2 { + return fmt.Errorf("invalid delfile string: %s", z.Member.(string)) + } + inode, _ := strconv.ParseUint(parts[0], 10, 64) + length, _ := strconv.ParseUint(parts[1], 10, 64) + dels = append(dels, &DumpedDelFile{Ino(inode), length, int64(z.Score)}) + } + + progress := utils.NewProgress(false, false) + var tree, trash *DumpedEntry + if root == 0 { + root = m.root + } + if root == 1 { + bar := progress.AddCountBar("Snapshot keys", m.rdb.DBSize(ctx).Val()) + if err = m.makeSnap(bar); err != nil { + return errors.Errorf("Fetch all metadata from Redis: %s", err) + } + bar.Done() + tree = m.dumpEntryFast(root) + trash = m.dumpEntryFast(TrashInode) + } else { + if tree, err = m.dumpEntry(root); err != nil { + return err + } + } + if tree == nil { + return errors.New("The entry of the root inode was not found") + } + tree.Name = "FSTree" + format, err := m.Load() + if err != nil { + return err + } + + rs, _ := m.rdb.MGet(ctx, []string{usedSpace, totalInodes, "nextinode", "nextchunk", "nextsession", "nextTrash"}...).Result() + cs := make([]int64, len(rs)) + for i, r := range rs { + if r != nil { + cs[i], _ = strconv.ParseInt(r.(string), 10, 64) + } + } + + keys, err := m.rdb.ZRange(ctx, allSessions, 0, -1).Result() + if err != nil { + return err + } + sessions := make([]*DumpedSustained, 0, len(keys)) + for _, k := range keys { + sid, _ := strconv.ParseUint(k, 10, 64) + var ss []string + if root == 1 { + ss = m.snap.listMap[m.sustained(sid)] + } else { + ss, err = m.rdb.SMembers(ctx, m.sustained(sid)).Result() + if err != nil { + return err + } + } + if len(ss) > 0 { + inodes := make([]Ino, 0, len(ss)) + for _, s := range ss { + inode, _ := strconv.ParseUint(s, 10, 64) + inodes = append(inodes, Ino(inode)) + } + sessions = append(sessions, &DumpedSustained{sid, inodes}) + } + } + + dm := &DumpedMeta{ + Setting: format, + Counters: &DumpedCounters{ + UsedSpace: cs[0], + UsedInodes: cs[1], + NextInode: cs[2] + 1, // Redis nextInode/nextChunk is 1 smaller than sql/tkv + NextChunk: cs[3] + 1, + NextSession: cs[4], + NextTrash: cs[5], + }, + Sustained: sessions, + DelFiles: dels, + } + bw, err := dm.writeJsonWithOutTree(w) + if err != nil { + return err + } + + bar := progress.AddCountBar("Dumped entries", 1) // with root + bar.Increment() + if trash != nil { + trash.Name = "Trash" + bar.IncrTotal(1) + bar.Increment() + } + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + if err = m.dumpDir(root, tree, bw, 1, showProgress); err != nil { + return err + } + if trash != nil { + if _, err = bw.WriteString(","); err != nil { + return err + } + if err = m.dumpDir(TrashInode, trash, bw, 1, showProgress); err != nil { + return err + } + } + if _, err = bw.WriteString("\n}\n"); err != nil { + return err + } + progress.Done() + m.snap = nil + + return bw.Flush() +} + +func (m *redisMeta) loadEntry(e *DumpedEntry, cs *DumpedCounters, refs map[string]int) error { + inode := e.Attr.Inode + logger.Debugf("Loading entry inode %d name %s", inode, e.Name) + ctx := Background + attr := loadAttr(e.Attr) + attr.Parent = e.Parent + p := m.rdb.Pipeline() + if attr.Typ == TypeFile { + attr.Length = e.Attr.Length + for _, c := range e.Chunks { + if len(c.Slices) == 0 { + continue + } + slices := make([]string, 0, len(c.Slices)) + for _, s := range c.Slices { + slices = append(slices, string(marshalSlice(s.Pos, s.Chunkid, s.Size, s.Off, s.Len))) + m.Lock() + refs[m.sliceKey(s.Chunkid, s.Size)]++ + m.Unlock() + if cs.NextChunk < int64(s.Chunkid) { + cs.NextChunk = int64(s.Chunkid) + } + } + p.RPush(ctx, m.chunkKey(inode, c.Index), slices) + } + } else if attr.Typ == TypeDirectory { + attr.Length = 4 << 10 + if len(e.Entries) > 0 { + dentries := make(map[string]interface{}) + for _, c := range e.Entries { + dentries[c.Name] = m.packEntry(typeFromString(c.Attr.Type), c.Attr.Inode) + } + p.HSet(ctx, m.entryKey(inode), dentries) + } + } else if attr.Typ == TypeSymlink { + attr.Length = uint64(len(e.Symlink)) + p.Set(ctx, m.symKey(inode), e.Symlink, 0) + } + if inode > 1 && inode != TrashInode { + cs.UsedSpace += align4K(attr.Length) + cs.UsedInodes += 1 + } + if inode < TrashInode { + if cs.NextInode < int64(inode) { + cs.NextInode = int64(inode) + } + } else { + if cs.NextTrash < int64(inode)-TrashInode { + cs.NextTrash = int64(inode) - TrashInode + } + } + + if len(e.Xattrs) > 0 { + xattrs := make(map[string]interface{}) + for _, x := range e.Xattrs { + xattrs[x.Name] = x.Value + } + p.HSet(ctx, m.xattrKey(inode), xattrs) + } + p.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0) + _, err := p.Exec(ctx) + return err +} + +func (m *redisMeta) LoadMeta(r io.Reader) error { + ctx := Background + dbsize, err := m.rdb.DBSize(ctx).Result() + if err != nil { + return err + } + if dbsize > 0 { + return fmt.Errorf("Database %s is not empty", m.Name()) + } + + dec := json.NewDecoder(r) + dm := &DumpedMeta{} + if err = dec.Decode(dm); err != nil { + return err + } + format, err := json.MarshalIndent(dm.Setting, "", "") + if err != nil { + return err + } + + progress := utils.NewProgress(false, false) + bar := progress.AddCountBar("Collected entries", 1) // with root + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + dm.FSTree.Attr.Inode = 1 + entries := make(map[Ino]*DumpedEntry) + if err = collectEntry(dm.FSTree, entries, showProgress); err != nil { + return err + } + if dm.Trash != nil { + bar.IncrTotal(1) + if err = collectEntry(dm.Trash, entries, showProgress); err != nil { + return err + } + } + bar.Done() + + counters := &DumpedCounters{} + refs := make(map[string]int) + bar = progress.AddCountBar("Loaded entries", int64(len(entries))) + maxNum := 100 + pool := make(chan struct{}, maxNum) + errCh := make(chan error, 100) + done := make(chan struct{}, 1) + var wg sync.WaitGroup + for _, entry := range entries { + select { + case err = <-errCh: + return err + default: + } + pool <- struct{}{} + wg.Add(1) + go func(entry *DumpedEntry) { + defer func() { + wg.Done() + bar.Increment() + <-pool + }() + if err = m.loadEntry(entry, counters, refs); err != nil { + errCh <- err + } + }(entry) + } + + go func() { + wg.Wait() + close(done) + }() + + select { + case err = <-errCh: + return err + case <-done: + } + progress.Done() + logger.Infof("Dumped counters: %+v", *dm.Counters) + logger.Infof("Loaded counters: %+v", *counters) + + p := m.rdb.Pipeline() + p.Set(ctx, "setting", format, 0) + cs := make(map[string]interface{}) + cs[usedSpace] = counters.UsedSpace + cs[totalInodes] = counters.UsedInodes + cs["nextinode"] = counters.NextInode + cs["nextchunk"] = counters.NextChunk + cs["nextsession"] = counters.NextSession + cs["nextTrash"] = counters.NextTrash + p.MSet(ctx, cs) + if len(dm.DelFiles) > 0 { + zs := make([]*redis.Z, 0, len(dm.DelFiles)) + for _, d := range dm.DelFiles { + zs = append(zs, &redis.Z{ + Score: float64(d.Expire), + Member: m.toDelete(d.Inode, d.Length), + }) + } + p.ZAdd(ctx, delfiles, zs...) + } + slices := make(map[string]interface{}) + for k, v := range refs { + if v > 1 { + slices[k] = v - 1 + } + } + if len(slices) > 0 { + p.HSet(ctx, sliceRefs, slices) + } + _, err = p.Exec(ctx) + return err +} diff --git a/pkg/meta/redis_lock.go b/pkg/meta/redis_lock.go new file mode 100644 index 0000000..11508e4 --- /dev/null +++ b/pkg/meta/redis_lock.go @@ -0,0 +1,208 @@ +//go:build !noredis +// +build !noredis + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "strconv" + "strings" + "syscall" + "time" + + "github.com/go-redis/redis/v8" +) + +func (r *redisMeta) Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno { + ikey := r.flockKey(inode) + lkey := r.ownerKey(owner) + if ltype == F_UNLCK { + return r.txn(ctx, func(tx *redis.Tx) error { + lkeys, err := tx.HKeys(ctx, ikey).Result() + if err != nil { + return err + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HDel(ctx, ikey, lkey) + if len(lkeys) == 1 && lkeys[0] == lkey { + pipe.SRem(ctx, r.lockedKey(r.sid), ikey) + } + return nil + }) + return err + }, ikey) + } + var err syscall.Errno + for { + err = r.txn(ctx, func(tx *redis.Tx) error { + owners, err := tx.HGetAll(ctx, ikey).Result() + if err != nil { + return err + } + if ltype == F_RDLCK { + for _, v := range owners { + if v == "W" { + return syscall.EAGAIN + } + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HSet(ctx, ikey, lkey, "R") + return nil + }) + return err + } + delete(owners, lkey) + if len(owners) > 0 { + return syscall.EAGAIN + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HSet(ctx, ikey, lkey, "W") + pipe.SAdd(ctx, r.lockedKey(r.sid), ikey) + return nil + }) + return err + }, ikey) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return err +} + +func (r *redisMeta) Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno { + if *ltype == F_UNLCK { + *start = 0 + *end = 0 + *pid = 0 + return 0 + } + lkey := r.ownerKey(owner) + owners, err := r.rdb.HGetAll(ctx, r.plockKey(inode)).Result() + if err != nil { + return errno(err) + } + delete(owners, lkey) // exclude itself + for k, d := range owners { + ls := loadLocks([]byte(d)) + for _, l := range ls { + // find conflicted locks + if (*ltype == F_WRLCK || l.ltype == F_WRLCK) && *end >= l.start && *start <= l.end { + *ltype = l.ltype + *start = l.start + *end = l.end + sid, _ := strconv.Atoi(strings.Split(k, "_")[0]) + if uint64(sid) == r.sid { + *pid = l.pid + } else { + *pid = 0 + } + return 0 + } + } + } + *ltype = F_UNLCK + *start = 0 + *end = 0 + *pid = 0 + return 0 +} + +func (r *redisMeta) Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno { + ikey := r.plockKey(inode) + lkey := r.ownerKey(owner) + var err syscall.Errno + lock := plockRecord{ltype, pid, start, end} + for { + err = r.txn(ctx, func(tx *redis.Tx) error { + if ltype == F_UNLCK { + d, err := tx.HGet(ctx, ikey, lkey).Result() + if err != nil { + return err + } + ls := loadLocks([]byte(d)) + if len(ls) == 0 { + return nil + } + ls = updateLocks(ls, lock) + var lkeys []string + if len(ls) == 0 { + lkeys, err = tx.HKeys(ctx, ikey).Result() + if err != nil { + return err + } + } + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + if len(ls) == 0 { + pipe.HDel(ctx, ikey, lkey) + if len(lkeys) == 1 && lkeys[0] == lkey { + pipe.SRem(ctx, r.lockedKey(r.sid), ikey) + } + } else { + pipe.HSet(ctx, ikey, lkey, dumpLocks(ls)) + } + return nil + }) + return err + } + owners, err := tx.HGetAll(ctx, ikey).Result() + if err != nil { + return err + } + ls := loadLocks([]byte(owners[lkey])) + delete(owners, lkey) + for _, d := range owners { + ls := loadLocks([]byte(d)) + for _, l := range ls { + // find conflicted locks + if (ltype == F_WRLCK || l.ltype == F_WRLCK) && end >= l.start && start <= l.end { + return syscall.EAGAIN + } + } + } + ls = updateLocks(ls, lock) + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HSet(ctx, ikey, lkey, dumpLocks(ls)) + pipe.SAdd(ctx, r.lockedKey(r.sid), ikey) + return nil + }) + return err + }, ikey) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return err +} diff --git a/pkg/meta/redis_test.go b/pkg/meta/redis_test.go new file mode 100644 index 0000000..d174c12 --- /dev/null +++ b/pkg/meta/redis_test.go @@ -0,0 +1,1245 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint:errcheck +package meta + +import ( + "bytes" + "context" + "runtime" + "strconv" + "sync" + "syscall" + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/utils" +) + +func TestRedisClient(t *testing.T) { + var conf = Config{MaxDeletes: 1} + _, err := newRedisMeta("http", "127.0.0.1:6379/10", &conf) + if err == nil { + t.Fatal("meta created with invalid url") + } + m, err := newRedisMeta("redis", "127.0.0.1:6379/10", &conf) + if err != nil || m.Name() != "redis" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func testMeta(t *testing.T, m Meta) { + if err := m.Reset(); err != nil { + t.Fatalf("reset meta: %s", err) + } + var base *baseMeta + switch m := m.(type) { + case *redisMeta: + base = &m.baseMeta + case *dbMeta: + base = &m.baseMeta + case *kvMeta: + base = &m.baseMeta + } + testMetaClient(t, m) + testTruncateAndDelete(t, m) + testTrash(t, m) + testRemove(t, m) + testStickyBit(t, m) + testLocks(t, m) + testConcurrentWrite(t, m) + testCompaction(t, m) + testCopyFileRange(t, m) + testCloseSession(t, m) + base.conf.CaseInsensi = true + testCaseIncensi(t, m) + base.conf.OpenCache = time.Second + base.of.expire = time.Second + testOpenCache(t, m) + base.conf.ReadOnly = true + testReadOnly(t, m) +} + +func testMetaClient(t *testing.T, m Meta) { + m.OnMsg(DeleteChunk, func(args ...interface{}) error { return nil }) + ctx := Background + var attr = &Attr{} + if st := m.GetAttr(ctx, 1, attr); st != 0 || attr.Mode != 0777 { // getattr of root always succeed + t.Fatalf("getattr root: %s", st) + } + + if err := m.Init(Format{Name: "test"}, true); err != nil { + t.Fatalf("initialize failed: %s", err) + } + if err := m.Init(Format{Name: "test2"}, false); err == nil { // not allowed + t.Fatalf("change name without --force is not allowed") + } + format, err := m.Load() + if err != nil { + t.Fatalf("load failed after initialization: %s", err) + } + if format.Name != "test" { + t.Fatalf("load got volume name %s, expected %s", format.Name, "test") + } + if err = m.NewSession(); err != nil { + t.Fatalf("new session: %s", err) + } + ses, err := m.ListSessions() + if err != nil || len(ses) != 1 { + t.Fatalf("list sessions %+v: %s", ses, err) + } + switch r := m.(type) { + case *redisMeta: + if r.sid != ses[0].Sid { + t.Fatalf("my sid %d != registered sid %d", r.sid, ses[0].Sid) + } + case *dbMeta: + if r.sid != ses[0].Sid { + t.Fatalf("my sid %d != registered sid %d", r.sid, ses[0].Sid) + } + case *kvMeta: + if r.sid != ses[0].Sid { + t.Fatalf("my sid %d != registered sid %d", r.sid, ses[0].Sid) + } + } + go m.CleanStaleSessions() + + var parent, inode, dummyInode Ino + if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &parent, attr); st != 0 { + t.Fatalf("mkdir d: %s", st) + } + defer m.Rmdir(ctx, 1, "d") + if st := m.Unlink(ctx, 1, "d"); st != syscall.EPERM { + t.Fatalf("unlink d: %s", st) + } + if st := m.Rmdir(ctx, parent, "."); st != syscall.EINVAL { + t.Fatalf("unlink d.: %s", st) + } + if st := m.Rmdir(ctx, parent, ".."); st != syscall.ENOTEMPTY { + t.Fatalf("unlink d..: %s", st) + } + if st := m.Lookup(ctx, 1, "d", &parent, attr); st != 0 { + t.Fatalf("lookup d: %s", st) + } + if st := m.Lookup(ctx, 1, "d", &parent, nil); st != syscall.EINVAL { + t.Fatalf("lookup d: %s", st) + } + if st := m.Lookup(ctx, 1, "..", &inode, attr); st != 0 || inode != 1 { + t.Fatalf("lookup ..: %s", st) + } + if st := m.Lookup(ctx, parent, ".", &inode, attr); st != 0 || inode != parent { + t.Fatalf("lookup .: %s", st) + } + if st := m.Lookup(ctx, parent, "..", &inode, attr); st != 0 || inode != 1 { + t.Fatalf("lookup ..: %s", st) + } + if attr.Nlink != 3 { + t.Fatalf("nlink expect 3, but got %d", attr.Nlink) + } + if st := m.Access(ctx, parent, 4, attr); st != 0 { + t.Fatalf("access d: %s", st) + } + if st := m.Create(ctx, parent, "f", 0650, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + _ = m.Close(ctx, inode) + var tino Ino + if st := m.Lookup(ctx, inode, ".", &tino, attr); st != syscall.ENOTDIR { + t.Fatalf("lookup /d/f/.: %s", st) + } + if st := m.Lookup(ctx, inode, "..", &tino, attr); st != syscall.ENOTDIR { + t.Fatalf("lookup /d/f/..: %s", st) + } + defer m.Unlink(ctx, parent, "f") + if st := m.Rmdir(ctx, parent, "f"); st != syscall.ENOTDIR { + t.Fatalf("rmdir f: %s", st) + } + if st := m.Rmdir(ctx, 1, "d"); st != syscall.ENOTEMPTY { + t.Fatalf("rmdir d: %s", st) + } + if st := m.Mknod(ctx, inode, "df", TypeFile, 0650, 022, 0, &dummyInode, nil); st != syscall.ENOTDIR { + t.Fatalf("create fd: %s", st) + } + if st := m.Mknod(ctx, parent, "f", TypeFile, 0650, 022, 0, &inode, attr); st != syscall.EEXIST { + t.Fatalf("create f: %s", st) + } + if st := m.Lookup(ctx, parent, "f", &inode, attr); st != 0 { + t.Fatalf("lookup f: %s", st) + } + if st := m.Resolve(ctx, 1, "d/f", &inode, attr); st != 0 && st != syscall.ENOTSUP { + t.Fatalf("resolve d/f: %s", st) + } + if st := m.Resolve(ctx, parent, "/f", &inode, attr); st != 0 && st != syscall.ENOTSUP { + t.Fatalf("resolve f: %s", st) + } + var ctx2 = NewContext(0, 1, []uint32{1}) + if st := m.Resolve(ctx2, parent, "/f", &inode, attr); st != syscall.EACCES && st != syscall.ENOTSUP { + t.Fatalf("resolve f: %s", st) + } + if st := m.Resolve(ctx, parent, "/f/c", &inode, attr); st != syscall.ENOTDIR && st != syscall.ENOTSUP { + t.Fatalf("resolve f: %s", st) + } + if st := m.Resolve(ctx, parent, "/f2", &inode, attr); st != syscall.ENOENT && st != syscall.ENOTSUP { + t.Fatalf("resolve f2: %s", st) + } + // check owner permission + var p1, c1 Ino + if st := m.Mkdir(ctx2, 1, "d1", 02755, 022, 0, &p1, attr); st != 0 { + t.Fatalf("mkdir d1: %s", st) + } + attr.Gid = 1 + m.SetAttr(ctx, p1, SetAttrGID, 0, attr) + if attr.Mode&02000 == 0 { + t.Fatalf("SGID is lost") + } + var ctx3 = NewContext(2, 2, []uint32{2}) + if st := m.Mkdir(ctx3, p1, "d2", 0777, 022, 0, &c1, attr); st != 0 { + t.Fatalf("mkdir d2: %s", st) + } + if attr.Gid != ctx2.Gid() { + t.Fatalf("inherit gid: %d != %d", attr.Gid, ctx2.Gid()) + } + if runtime.GOOS == "linux" && attr.Mode&02000 == 0 { + t.Fatalf("not inherit sgid") + } + if st := m.Resolve(ctx2, 1, "/d1/d2", nil, nil); st != 0 && st != syscall.ENOTSUP { + t.Fatalf("resolve /d1/d2: %s", st) + } + m.Rmdir(ctx2, p1, "d2") + m.Rmdir(ctx2, 1, "d1") + + attr.Atime = 2 + attr.Mtime = 2 + attr.Uid = 1 + attr.Gid = 1 + attr.Mode = 0640 + if st := m.SetAttr(ctx, inode, SetAttrAtime|SetAttrMtime|SetAttrUID|SetAttrGID|SetAttrMode, 0, attr); st != 0 { + t.Fatalf("setattr f: %s", st) + } + if st := m.SetAttr(ctx, inode, 0, 0, attr); st != 0 { // changes nothing + t.Fatalf("setattr f: %s", st) + } + if st := m.GetAttr(ctx, inode, attr); st != 0 { + t.Fatalf("getattr f: %s", st) + } + if attr.Atime != 2 || attr.Mtime != 2 || attr.Uid != 1 || attr.Gid != 1 || attr.Mode != 0640 { + t.Fatalf("atime:%d mtime:%d uid:%d gid:%d mode:%o", attr.Atime, attr.Mtime, attr.Uid, attr.Gid, attr.Mode) + } + if st := m.SetAttr(ctx, inode, SetAttrAtimeNow|SetAttrMtimeNow, 0, attr); st != 0 { + t.Fatalf("setattr f: %s", st) + } + fakeCtx := NewContext(100, 2, []uint32{2, 1}) + if st := m.Access(fakeCtx, parent, 2, nil); st != syscall.EACCES { + t.Fatalf("access d: %s", st) + } + if st := m.Access(fakeCtx, inode, 4, nil); st != 0 { + t.Fatalf("access f: %s", st) + } + var entries []*Entry + if st := m.Readdir(ctx, parent, 0, &entries); st != 0 { + t.Fatalf("readdir: %s", st) + } else if len(entries) != 3 { + t.Fatalf("entries: %d", len(entries)) + } else if string(entries[0].Name) != "." || string(entries[1].Name) != ".." || string(entries[2].Name) != "f" { + t.Fatalf("entries: %+v", entries) + } + if st := m.Rename(ctx, parent, "f", 1, "f2", RenameWhiteout, &inode, attr); st != syscall.ENOTSUP { + t.Fatalf("rename d/f -> f2: %s", st) + } + if st := m.Rename(ctx, parent, "f", 1, "f2", 0, &inode, attr); st != 0 { + t.Fatalf("rename d/f -> f2: %s", st) + } + defer func() { + _ = m.Unlink(ctx, 1, "f2") + }() + if st := m.Rename(ctx, 1, "f2", 1, "f2", 0, &inode, attr); st != 0 { + t.Fatalf("rename f2 -> f2: %s", st) + } + if st := m.Rename(ctx, 1, "f2", 1, "f", RenameExchange, &inode, attr); st != syscall.ENOENT { + t.Fatalf("rename f2 -> f: %s", st) + } + if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + _ = m.Close(ctx, inode) + defer m.Unlink(ctx, 1, "f") + if st := m.Rename(ctx, 1, "f2", 1, "f", RenameNoReplace, &inode, attr); st != syscall.EEXIST { + t.Fatalf("rename f2 -> f: %s", st) + } + if st := m.Rename(ctx, 1, "f2", 1, "f", 0, &inode, attr); st != 0 { + t.Fatalf("rename f2 -> f: %s", st) + } + if st := m.Rename(ctx, 1, "f", 1, "d", RenameExchange, &inode, attr); st != 0 { + t.Fatalf("rename f <-> d: %s", st) + } + if st := m.Rename(ctx, 1, "d", 1, "f", 0, &inode, attr); st != 0 { + t.Fatalf("rename d -> f: %s", st) + } + if st := m.GetAttr(ctx, 1, attr); st != 0 { + t.Fatalf("getattr f: %s", st) + } + if attr.Nlink != 2 { + t.Fatalf("nlink expect 2, but got %d", attr.Nlink) + } + if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &parent, attr); st != 0 { + t.Fatalf("mkdir d: %s", st) + } + // Test rename with parent change + var parent2 Ino + if st := m.Mkdir(ctx, 1, "d4", 0777, 0, 0, &parent2, attr); st != 0 { + t.Fatalf("create dir d4: %s", st) + } + if st := m.Mkdir(ctx, parent2, "d5", 0777, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create dir d4/d5: %s", st) + } + if st := m.Rename(ctx, parent2, "d5", 1, "d5", RenameNoReplace, &inode, attr); st != 0 { + t.Fatalf("rename d4/d5 <-> d5: %s", st) + } else if attr.Parent != 1 { + t.Fatalf("after rename d4/d5 <-> d5 parent %d expect 1", attr.Parent) + } + if st := m.Mknod(ctx, parent2, "f6", TypeFile, 0650, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create dir d4/f6: %s", st) + } + if st := m.Rename(ctx, 1, "d5", parent2, "f6", RenameExchange, &inode, attr); st != 0 { + t.Fatalf("rename d5 <-> d4/d6: %s", st) + } else if attr.Parent != parent2 { + t.Fatalf("after exchange d5 <-> d4/f6 parent %d expect %d", attr.Parent, parent2) + } else if attr.Typ != TypeDirectory { + t.Fatalf("after exchange d5 <-> d4/f6 type %d expect %d", attr.Typ, TypeDirectory) + } + if st := m.Lookup(ctx, 1, "d5", &inode, attr); st != 0 || attr.Parent != 1 { + t.Fatalf("lookup d5 after exchange: %s; parent %d expect 1", st, attr.Parent) + } else if attr.Typ != TypeFile { + t.Fatalf("after exchange d5 <-> d4/f6 type %d expect %d", attr.Typ, TypeFile) + } + if st := m.Rmdir(ctx, parent2, "f6"); st != 0 { + t.Fatalf("rmdir d4/f6 : %s", st) + } + if st := m.Rmdir(ctx, 1, "d4"); st != 0 { + t.Fatalf("rmdir d4 first : %s", st) + } + if st := m.Unlink(ctx, 1, "d5"); st != 0 { + t.Fatalf("rmdir d6 : %s", st) + } + if st := m.Lookup(ctx, 1, "f", &inode, attr); st != 0 { + t.Fatalf("lookup f: %s", st) + } + if st := m.Link(ctx, inode, 1, "f3", attr); st != 0 { + t.Fatalf("link f3 -> f: %s", st) + } + defer m.Unlink(ctx, 1, "f3") + if st := m.Link(ctx, inode, 1, "F3", attr); st != 0 { // CaseInsensi = false + t.Fatalf("link F3 -> f: %s", st) + } + if st := m.Link(ctx, parent, 1, "d2", attr); st != syscall.EPERM { + t.Fatalf("link d2 -> d: %s", st) + } + if st := m.Symlink(ctx, 1, "s", "/f", &inode, attr); st != 0 { + t.Fatalf("symlink s -> /f: %s", st) + } + defer m.Unlink(ctx, 1, "s") + var target1, target2 []byte + if st := m.ReadLink(ctx, inode, &target1); st != 0 { + t.Fatalf("readlink s: %s", st) + } + if st := m.ReadLink(ctx, inode, &target2); st != 0 { // cached + t.Fatalf("readlink s: %s", st) + } + if !bytes.Equal(target1, target2) || !bytes.Equal(target1, []byte("/f")) { + t.Fatalf("readlink got %s %s, expected %s", target1, target2, "/f") + } + if st := m.ReadLink(ctx, parent, &target1); st != syscall.ENOENT { + t.Fatalf("readlink d: %s", st) + } + if st := m.Lookup(ctx, 1, "f", &inode, attr); st != 0 { + t.Fatalf("lookup f: %s", st) + } + + // data + var chunkid uint64 + // try to open a file that does not exist + if st := m.Open(ctx, 99999, syscall.O_RDWR, &Attr{}); st != syscall.ENOENT { + t.Fatalf("open not exist inode got %d, expected %d", st, syscall.ENOENT) + } + if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 { + t.Fatalf("open f: %s", st) + } + _ = m.Close(ctx, inode) + if st := m.NewChunk(ctx, &chunkid); st != 0 { + t.Fatalf("write chunk: %s", st) + } + var s = Slice{Chunkid: chunkid, Size: 100, Len: 100} + if st := m.Write(ctx, inode, 0, 100, s); st != 0 { + t.Fatalf("write end: %s", st) + } + var chunks []Slice + if st := m.Read(ctx, inode, 0, &chunks); st != 0 { + t.Fatalf("read chunk: %s", st) + } + if len(chunks) != 2 || chunks[0].Chunkid != 0 || chunks[0].Size != 100 || chunks[1].Chunkid != chunkid || chunks[1].Size != 100 { + t.Fatalf("chunks: %v", chunks) + } + if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocKeepSize, 100, 50); st != 0 { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocCollapesRange, 100, 50); st != syscall.EINVAL { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocInsertRange, 100, 50); st != syscall.EINVAL { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, inode, fallocCollapesRange, 100, 50); st != syscall.ENOTSUP { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, inode, fallocPunchHole, 100, 50); st != syscall.EINVAL { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocKeepSize, 0, 0); st != syscall.EINVAL { + t.Fatalf("fallocate: %s", st) + } + if st := m.Fallocate(ctx, parent, fallocPunchHole|fallocKeepSize, 100, 50); st != syscall.EPERM { + t.Fatalf("fallocate dir: %s", st) + } + if st := m.Read(ctx, inode, 0, &chunks); st != 0 { + t.Fatalf("read chunk: %s", st) + } + if len(chunks) != 3 || chunks[1].Chunkid != 0 || chunks[1].Len != 50 || chunks[2].Chunkid != chunkid || chunks[2].Len != 50 { + t.Fatalf("chunks: %v", chunks) + } + + // xattr + if st := m.SetXattr(ctx, inode, "a", []byte("v"), XattrCreateOrReplace); st != 0 { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v2"), XattrCreateOrReplace); st != 0 { + t.Fatalf("setxattr: %s", st) + } + var value []byte + if st := m.GetXattr(ctx, inode, "a", &value); st != 0 || string(value) != "v2" { + t.Fatalf("getxattr: %s %v", st, value) + } + if st := m.ListXattr(ctx, inode, &value); st != 0 || string(value) != "a\000" { + t.Fatalf("listxattr: %s %v", st, value) + } + if st := m.Unlink(ctx, 1, "F3"); st != 0 { + t.Fatalf("unlink F3: %s", st) + } + if st := m.GetXattr(ctx, inode, "a", &value); st != 0 || string(value) != "v2" { + t.Fatalf("getxattr: %s %v", st, value) + } + if st := m.RemoveXattr(ctx, inode, "a"); st != 0 { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v"), XattrReplace); st != ENOATTR { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v3"), XattrCreate); st != 0 { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v3"), XattrCreate); st != syscall.EEXIST { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v4"), XattrReplace); st != 0 { + t.Fatalf("setxattr: %s", st) + } + if st := m.SetXattr(ctx, inode, "a", []byte("v5"), 5); st != 0 { // unknown flag is ignored + t.Fatalf("setxattr: %s", st) + } + + var totalspace, availspace, iused, iavail uint64 + if st := m.StatFS(ctx, &totalspace, &availspace, &iused, &iavail); st != 0 { + t.Fatalf("statfs: %s", st) + } + if totalspace != 1<<50 || iavail != 10<<20 { + t.Fatalf("total space %d, iavail %d", totalspace, iavail) + } + if err = m.Init(Format{Name: "test", Capacity: 1 << 20, Inodes: 100}, false); err != nil { + t.Fatalf("set quota failed: %s", err) + } + if st := m.StatFS(ctx, &totalspace, &availspace, &iused, &iavail); st != 0 { + t.Fatalf("statfs: %s", st) + } + if totalspace != 1<<20 || iavail != 97 { + t.Fatalf("total space %d, iavail %d", totalspace, iavail) + } + var summary Summary + if st := GetSummary(m, ctx, parent, &summary, false); st != 0 { + t.Fatalf("summary: %s", st) + } + expected := Summary{Length: 0, Size: 4096, Files: 0, Dirs: 1} + if summary != expected { + t.Fatalf("summary %+v not equal to expected: %+v", summary, expected) + } + summary = Summary{} + if st := GetSummary(m, ctx, 1, &summary, true); st != 0 { + t.Fatalf("summary: %s", st) + } + expected = Summary{Length: 402, Size: 20480, Files: 3, Dirs: 2} + if summary != expected { + t.Fatalf("summary %+v not equal to expected: %+v", summary, expected) + } + if st := GetSummary(m, ctx, inode, &summary, true); st != 0 { + t.Fatalf("summary: %s", st) + } + expected = Summary{Length: 602, Size: 24576, Files: 4, Dirs: 2} + if summary != expected { + t.Fatalf("summary %+v not equal to expected: %+v", summary, expected) + } + if st := m.Unlink(ctx, 1, "f"); st != 0 { + t.Fatalf("unlink f: %s", st) + } + if st := m.Unlink(ctx, 1, "f3"); st != 0 { + t.Fatalf("unlink f3: %s", st) + } + time.Sleep(time.Millisecond * 100) // wait for delete + if st := m.Read(ctx, inode, 0, &chunks); st != 0 { + t.Fatalf("read chunk: %s", st) + } + if len(chunks) != 0 { + t.Fatalf("chunks: %v", chunks) + } + if st := m.Rmdir(ctx, 1, "d"); st != 0 { + t.Fatalf("rmdir d: %s", st) + } +} + +func testStickyBit(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + ctx := Background + var sticky, normal, inode Ino + var attr = &Attr{} + m.Mkdir(ctx, 1, "tmp", 01777, 0, 0, &sticky, attr) + m.Mkdir(ctx, 1, "tmp2", 0777, 0, 0, &normal, attr) + ctxA := NewContext(1, 1, []uint32{1}) + // file + m.Create(ctxA, sticky, "f", 0777, 0, 0, &inode, attr) + m.Create(ctxA, normal, "f", 0777, 0, 0, &inode, attr) + ctxB := NewContext(1, 2, []uint32{1}) + if e := m.Unlink(ctxB, sticky, "f"); e != syscall.EACCES { + t.Fatalf("unlink f: %s", e) + } + if e := m.Rename(ctxB, sticky, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename f: %s", e) + } + if e := m.Rename(ctxB, sticky, "f", normal, "f2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename f: %s", e) + } + m.Create(ctxB, sticky, "f2", 0777, 0, 0, &inode, attr) + if e := m.Rename(ctxB, sticky, "f2", sticky, "f", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("overwrite f: %s", e) + } + if e := m.Rename(ctxA, sticky, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename f: %s", e) + } + if e := m.Rename(ctxA, normal, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename f: %s", e) + } + if e := m.Rename(ctxA, sticky, "f", sticky, "f3", 0, &inode, attr); e != 0 { + t.Fatalf("rename f: %s", e) + } + if e := m.Unlink(ctxA, sticky, "f3"); e != 0 { + t.Fatalf("unlink f3: %s", e) + } + // dir + m.Mkdir(ctxA, sticky, "d", 0777, 0, 0, &inode, attr) + m.Mkdir(ctxA, normal, "d", 0777, 0, 0, &inode, attr) + if e := m.Rmdir(ctxB, sticky, "d"); e != syscall.EACCES { + t.Fatalf("rmdir d: %s", e) + } + if e := m.Rename(ctxB, sticky, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename d: %s", e) + } + if e := m.Rename(ctxB, sticky, "d", normal, "d2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename d: %s", e) + } + m.Mkdir(ctxB, sticky, "d2", 0777, 0, 0, &inode, attr) + if e := m.Rename(ctxB, sticky, "d2", sticky, "d", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("overwrite d: %s", e) + } + if e := m.Rename(ctxA, sticky, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename d: %s", e) + } + if e := m.Rename(ctxA, normal, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES { + t.Fatalf("rename d: %s", e) + } + if e := m.Rename(ctxA, sticky, "d", sticky, "d3", 0, &inode, attr); e != 0 { + t.Fatalf("rename d: %s", e) + } + if e := m.Rmdir(ctxA, sticky, "d3"); e != 0 { + t.Fatalf("rmdir d3: %s", e) + } +} + +func testLocks(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + ctx := Background + var inode Ino + var attr = &Attr{} + defer m.Unlink(ctx, 1, "f") + if st := m.Create(ctx, 1, "f", 0644, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + // flock + o1 := uint64(0xF000000000000001) + if st := m.Flock(ctx, inode, o1, syscall.F_RDLCK, false); st != 0 { + t.Fatalf("flock rlock: %s", st) + } + if st := m.Flock(ctx, inode, 2, syscall.F_RDLCK, false); st != 0 { + t.Fatalf("flock rlock: %s", st) + } + if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != syscall.EAGAIN { + t.Fatalf("flock wlock: %s", st) + } + if st := m.Flock(ctx, inode, 2, syscall.F_UNLCK, false); st != 0 { + t.Fatalf("flock unlock: %s", st) + } + if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 { + t.Fatalf("flock wlock again: %s", st) + } + if st := m.Flock(ctx, inode, 2, syscall.F_WRLCK, false); st != syscall.EAGAIN { + t.Fatalf("flock wlock: %s", st) + } + if st := m.Flock(ctx, inode, 2, syscall.F_RDLCK, false); st != syscall.EAGAIN { + t.Fatalf("flock rlock: %s", st) + } + if st := m.Flock(ctx, inode, o1, syscall.F_UNLCK, false); st != 0 { + t.Fatalf("flock unlock: %s", st) + } + if r, ok := m.(*redisMeta); ok { + ms, err := r.rdb.SMembers(context.Background(), r.lockedKey(r.sid)).Result() + if err != nil { + t.Fatalf("Smember %s: %s", r.lockedKey(r.sid), err) + } + if len(ms) != 0 { + t.Fatalf("locked inodes leaked: %d", len(ms)) + } + } + + // POSIX locks + if st := m.Setlk(ctx, inode, o1, false, syscall.F_RDLCK, 0, 0xFFFF, 1); st != 0 { + t.Fatalf("plock rlock: %s", st) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_RDLCK, 0, 0x2FFFF, 1); st != 0 { + t.Fatalf("plock rlock: %s", st) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0, 0xFFFF, 1); st != syscall.EAGAIN { + t.Fatalf("plock wlock: %s", st) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0x10000, 0x20000, 1); st != 0 { + t.Fatalf("plock wlock: %s", st) + } + if st := m.Setlk(ctx, inode, o1, false, syscall.F_UNLCK, 0, 0x20000, 1); st != 0 { + t.Fatalf("plock unlock: %s", st) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0, 0xFFFF, 10); st != 0 { + t.Fatalf("plock wlock: %s", st) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0x2000, 0xFFFF, 20); st != 0 { + t.Fatalf("plock wlock: %s", st) + } + if st := m.Setlk(ctx, inode, o1, false, syscall.F_WRLCK, 0, 0xFFFF, 1); st != syscall.EAGAIN { + t.Fatalf("plock rlock: %s", st) + } + var ltype, pid uint32 = syscall.F_WRLCK, 1 + var start, end uint64 = 0x2000, 0xFFFF + if st := m.Getlk(ctx, inode, o1, <ype, &start, &end, &pid); st != 0 || ltype != syscall.F_WRLCK || pid != 20 || start != 0x2000 || end != 0xFFFF { + t.Fatalf("plock get rlock: %s, %d %d %x %x", st, ltype, pid, start, end) + } + if st := m.Setlk(ctx, inode, 2, false, syscall.F_UNLCK, 0, 0x2FFFF, 1); st != 0 { + t.Fatalf("plock unlock: %s", st) + } + ltype = syscall.F_WRLCK + start, end = 0, 0xFFFFFF + if st := m.Getlk(ctx, inode, o1, <ype, &start, &end, &pid); st != 0 || ltype != syscall.F_UNLCK || pid != 0 || start != 0 || end != 0 { + t.Fatalf("plock get rlock: %s, %d %d %x %x", st, ltype, pid, start, end) + } + + // concurrent locks + var g sync.WaitGroup + var count int + var err syscall.Errno + for i := 0; i < 100; i++ { + g.Add(1) + go func(i int) { + defer g.Done() + if st := m.Setlk(ctx, inode, uint64(i), true, syscall.F_WRLCK, 0, 0xFFFF, uint32(i)); st != 0 { + err = st + } + count++ + time.Sleep(time.Millisecond) + count-- + if count > 0 { + logger.Fatalf("count should be be zero but got %d", count) + } + if st := m.Setlk(ctx, inode, uint64(i), false, syscall.F_UNLCK, 0, 0xFFFF, uint32(i)); st != 0 { + logger.Fatalf("plock unlock: %s", st) + err = st + } + }(i) + } + g.Wait() + if err != 0 { + t.Fatalf("lock fail: %s", err) + } + + if r, ok := m.(*redisMeta); ok { + ms, err := r.rdb.SMembers(context.Background(), r.lockedKey(r.sid)).Result() + if err != nil { + t.Fatalf("Smember %s: %s", r.lockedKey(r.sid), err) + } + if len(ms) != 0 { + t.Fatalf("locked inode leaked: %d", len(ms)) + } + } +} + +func testRemove(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + ctx := Background + var inode, parent Ino + var attr = &Attr{} + if st := m.Create(ctx, 1, "f", 0644, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + if st := Remove(m, ctx, 1, "f"); st != 0 { + t.Fatalf("rmr f: %s", st) + } + if st := m.Mkdir(ctx, 1, "d", 0755, 0, 0, &parent, attr); st != 0 { + t.Fatalf("mkdir d: %s", st) + } + if st := m.Mkdir(ctx, parent, "d2", 0755, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create d/d2: %s", st) + } + if st := m.Create(ctx, parent, "f", 0644, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create d/f: %s", st) + } + if p, st := GetPath(m, ctx, parent); st != 0 || p != "/d" { + t.Fatalf("get path /d: %s, %s", st, p) + } + if p, st := GetPath(m, ctx, inode); st != 0 || p != "/d/f" { + t.Fatalf("get path /d/f: %s, %s", st, p) + } + for i := 0; i < 4096; i++ { + if st := m.Create(ctx, 1, "f"+strconv.Itoa(i), 0644, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create f%s: %s", strconv.Itoa(i), st) + } + } + var entries []*Entry + if st := m.Readdir(ctx, 1, 1, &entries); st != 0 { + t.Fatalf("readdir: %s", st) + } else if len(entries) != 4099 { + t.Fatalf("entries: %d", len(entries)) + } + if st := Remove(m, ctx, 1, "d"); st != 0 { + t.Fatalf("rmr d: %s", st) + } +} + +func testCaseIncensi(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + ctx := Background + var inode Ino + var attr = &Attr{} + _ = m.Create(ctx, 1, "foo", 0755, 0, 0, &inode, attr) + if st := m.Create(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create Foo should be ok") + } + if st := m.Create(ctx, 1, "Foo", 0755, 0, syscall.O_EXCL, &inode, attr); st != syscall.EEXIST { + t.Fatalf("create should fail with EEXIST") + } + if st := m.Lookup(ctx, 1, "Foo", &inode, attr); st != 0 { + t.Fatalf("lookup Foo should be OK") + } + if st := m.Rename(ctx, 1, "Foo", 1, "bar", 0, &inode, attr); st != 0 { + t.Fatalf("rename Foo to bar should be OK, but got %s", st) + } + if st := m.Create(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 { + t.Fatalf("create Foo should be OK") + } + if st := m.Resolve(ctx, 1, "/Foo", &inode, attr); st != syscall.ENOTSUP { + t.Fatalf("resolve with case insensitive should be ENOTSUP") + } + if st := m.Lookup(ctx, 1, "Bar", &inode, attr); st != 0 { + t.Fatalf("lookup Bar should be OK") + } + if st := m.Link(ctx, inode, 1, "foo", attr); st != syscall.EEXIST { + t.Fatalf("link should fail with EEXIST") + } + if st := m.Unlink(ctx, 1, "Bar"); st != 0 { + t.Fatalf("unlink Bar should be OK") + } + if st := m.Unlink(ctx, 1, "foo"); st != 0 { + t.Fatalf("unlink foo should be OK") + } + if st := m.Mkdir(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 { + t.Fatalf("mkdir Foo should be OK, but got %s", st) + } + if st := m.Rmdir(ctx, 1, "foo"); st != 0 { + t.Fatalf("rmdir foo should be OK") + } +} + +type compactor interface { + compactChunk(inode Ino, indx uint32, force bool) +} + +func testCompaction(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + var l sync.Mutex + deleted := make(map[uint64]int) + m.OnMsg(DeleteChunk, func(args ...interface{}) error { + l.Lock() + chunkid := args[0].(uint64) + deleted[chunkid] = 1 + l.Unlock() + return nil + }) + m.OnMsg(CompactChunk, func(args ...interface{}) error { + return nil + }) + ctx := Background + var inode Ino + var attr = &Attr{} + _ = m.Unlink(ctx, 1, "f") + if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create file %s", st) + } + defer func() { + _ = m.Unlink(ctx, 1, "f") + }() + + // random write + var chunkid uint64 + m.NewChunk(ctx, &chunkid) + _ = m.Write(ctx, inode, 1, uint32(0), Slice{Chunkid: chunkid, Size: 64 << 20, Len: 64 << 20}) + m.NewChunk(ctx, &chunkid) + _ = m.Write(ctx, inode, 1, uint32(30<<20), Slice{Chunkid: chunkid, Size: 8, Len: 8}) + m.NewChunk(ctx, &chunkid) + _ = m.Write(ctx, inode, 1, uint32(40<<20), Slice{Chunkid: chunkid, Size: 8, Len: 8}) + var cs1 []Slice + _ = m.Read(ctx, inode, 1, &cs1) + if len(cs1) != 5 { + t.Fatalf("expect 5 slices, but got %+v", cs1) + } + if c, ok := m.(compactor); ok { + c.compactChunk(inode, 1, true) + } + var cs []Slice + _ = m.Read(ctx, inode, 1, &cs) + if len(cs) != 1 { + t.Fatalf("expect 1 slice, but got %+v", cs) + } + + // append + var size uint32 = 100000 + for i := 0; i < 200; i++ { + var chunkid uint64 + m.NewChunk(ctx, &chunkid) + if st := m.Write(ctx, inode, 0, uint32(i)*size, Slice{Chunkid: chunkid, Size: size, Len: size}); st != 0 { + t.Fatalf("write %d: %s", i, st) + } + time.Sleep(time.Millisecond) + } + if c, ok := m.(compactor); ok { + c.compactChunk(inode, 0, true) + } + var chunks []Slice + if st := m.Read(ctx, inode, 0, &chunks); st != 0 { + t.Fatalf("read 0: %s", st) + } + if len(chunks) >= 10 { + t.Fatalf("inode %d should be compacted, but have %d slices", inode, len(chunks)) + } + var total uint32 + for _, s := range chunks { + total += s.Len + } + if total != size*200 { + t.Fatalf("size of slice should be %d, but got %d", size*200, total) + } + + // TODO: check result if that's predictable + p, bar := utils.MockProgress() + if st := m.CompactAll(ctx, bar); st != 0 { + t.Fatalf("compactall: %s", st) + } + p.Done() + slices := make(map[Ino][]Slice) + if st := m.ListSlices(ctx, slices, false, nil); st != 0 { + t.Fatalf("list all slices: %s", st) + } + + l.Lock() + deletes := len(deleted) + l.Unlock() + if deletes < 30 { + t.Fatalf("deleted chunks %d is less then 30", deletes) + } +} + +func testConcurrentWrite(t *testing.T, m Meta) { + m.OnMsg(DeleteChunk, func(args ...interface{}) error { + return nil + }) + m.OnMsg(CompactChunk, func(args ...interface{}) error { + return nil + }) + _ = m.Init(Format{Name: "test"}, false) + + ctx := Background + var inode Ino + var attr = &Attr{} + _ = m.Unlink(ctx, 1, "f") + if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create file %s", st) + } + defer m.Unlink(ctx, 1, "f") + + var errno syscall.Errno + var g sync.WaitGroup + for i := 0; i <= 10; i++ { + g.Add(1) + go func(indx uint32) { + defer g.Done() + for j := 0; j < 100; j++ { + var chunkid uint64 + m.NewChunk(ctx, &chunkid) + var slice = Slice{Chunkid: chunkid, Size: 100, Len: 100} + st := m.Write(ctx, inode, indx, 0, slice) + if st != 0 { + errno = st + break + } + } + }(uint32(i)) + } + g.Wait() + if errno != 0 { + t.Fatal() + } +} + +func testTruncateAndDelete(t *testing.T, m Meta) { + m.OnMsg(DeleteChunk, func(args ...interface{}) error { + return nil + }) + _ = m.Init(Format{Name: "test"}, false) + + ctx := Background + var inode Ino + var attr = &Attr{} + m.Unlink(ctx, 1, "f") + if st := m.Truncate(ctx, 1, 0, 4<<10, attr); st != syscall.EPERM { + t.Fatalf("truncate dir %s", st) + } + if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create file %s", st) + } + defer m.Unlink(ctx, 1, "f") + var cid uint64 + if st := m.NewChunk(ctx, &cid); st != 0 { + t.Fatalf("new chunk: %s", st) + } + if st := m.Write(ctx, inode, 0, 100, Slice{cid, 100, 0, 100}); st != 0 { + t.Fatalf("write file %s", st) + } + if st := m.Truncate(ctx, inode, 0, 200<<20, attr); st != 0 { + t.Fatalf("truncate file %s", st) + } + if st := m.Truncate(ctx, inode, 0, (10<<40)+10, attr); st != 0 { + t.Fatalf("truncate file %s", st) + } + if st := m.Truncate(ctx, inode, 0, (300<<20)+10, attr); st != 0 { + t.Fatalf("truncate file %s", st) + } + var total int64 + slices := make(map[Ino][]Slice) + m.ListSlices(ctx, slices, false, func() { total++ }) + var totalSlices int + for _, ss := range slices { + totalSlices += len(ss) + } + if totalSlices != 1 { + t.Fatalf("number of chunks: %d != 1, %+v", totalSlices, slices) + } + _ = m.Close(ctx, inode) + if st := m.Unlink(ctx, 1, "f"); st != 0 { + t.Fatalf("unlink file %s", st) + } + + time.Sleep(time.Millisecond * 100) + slices = make(map[Ino][]Slice) + m.ListSlices(ctx, slices, false, nil) + totalSlices = 0 + for _, ss := range slices { + totalSlices += len(ss) + } + // the last chunk could be found and deleted + if totalSlices > 1 { + t.Fatalf("number of chunks: %d > 1, %+v", totalSlices, slices) + } +} + +func testCopyFileRange(t *testing.T, m Meta) { + m.OnMsg(DeleteChunk, func(args ...interface{}) error { + return nil + }) + _ = m.Init(Format{Name: "test"}, false) + + ctx := Background + var iin, iout Ino + var attr = &Attr{} + _ = m.Unlink(ctx, 1, "fin") + _ = m.Unlink(ctx, 1, "fout") + if st := m.Create(ctx, 1, "fin", 0650, 022, 0, &iin, attr); st != 0 { + t.Fatalf("create file %s", st) + } + defer m.Unlink(ctx, 1, "fin") + if st := m.Create(ctx, 1, "fout", 0650, 022, 0, &iout, attr); st != 0 { + t.Fatalf("create file %s", st) + } + defer m.Unlink(ctx, 1, "fout") + m.Write(ctx, iin, 0, 100, Slice{10, 200, 0, 100}) + m.Write(ctx, iin, 1, 100<<10, Slice{11, 40 << 20, 0, 40 << 20}) + m.Write(ctx, iin, 3, 0, Slice{12, 63 << 20, 10 << 20, 30 << 20}) + m.Write(ctx, iout, 2, 10<<20, Slice{13, 50 << 20, 10 << 20, 30 << 20}) + var copied uint64 + if st := m.CopyFileRange(ctx, iin, 150, iout, 30<<20, 200<<20, 0, &copied); st != 0 { + t.Fatalf("copy file range: %s", st) + } + var expected uint64 = 200 << 20 + if copied != expected { + t.Fatalf("expect copy %d bytes, but got %d", expected, copied) + } + var expectedChunks = [][]Slice{ + {{0, 30 << 20, 0, 30 << 20}, {10, 200, 50, 50}, {0, 0, 200, ChunkSize - 30<<20 - 50}}, + {{0, 0, 150 + (ChunkSize - 30<<20), 30<<20 - 150}, {0, 0, 0, 100 << 10}, {11, 40 << 20, 0, (34 << 20) + 150 - (100 << 10)}}, + {{11, 40 << 20, (34 << 20) + 150 - (100 << 10), 6<<20 - 150 + 100<<10}, {0, 0, 40<<20 + 100<<10, ChunkSize - 40<<20 - 100<<10}, {0, 0, 0, 150 + (ChunkSize - 30<<20)}}, + {{0, 0, 150 + (ChunkSize - 30<<20), 30<<20 - 150}, {12, 63 << 20, 10 << 20, (8 << 20) + 150}}, + } + for i := uint32(0); i < 4; i++ { + var chunks []Slice + if st := m.Read(ctx, iout, i, &chunks); st != 0 { + t.Fatalf("read chunk %d: %s", i, st) + } + if len(chunks) != len(expectedChunks[i]) { + t.Fatalf("expect chunk %d: %+v, but got %+v", i, expectedChunks[i], chunks) + } + for j, s := range chunks { + if s != expectedChunks[i][j] { + t.Fatalf("expect slice %d,%d: %+v, but got %+v", i, j, expectedChunks[i][j], s) + } + } + } +} + +func testCloseSession(t *testing.T, m Meta) { + _ = m.Init(Format{Name: "test"}, false) + if err := m.NewSession(); err != nil { + t.Fatalf("new session: %s", err) + } + + ctx := Background + var inode Ino + var attr = &Attr{} + if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + if st := m.Flock(ctx, inode, 1, syscall.F_WRLCK, false); st != 0 { + t.Fatalf("flock wlock: %s", st) + } + if st := m.Setlk(ctx, inode, 1, false, syscall.F_WRLCK, 0x10000, 0x20000, 1); st != 0 { + t.Fatalf("plock wlock: %s", st) + } + if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 { + t.Fatalf("open f: %s", st) + } + if st := m.Unlink(ctx, 1, "f"); st != 0 { + t.Fatalf("unlink f: %s", st) + } + var sid uint64 + switch m := m.(type) { + case *redisMeta: + sid = m.sid + case *dbMeta: + sid = m.sid + case *kvMeta: + sid = m.sid + } + s, err := m.GetSession(sid) + if err != nil { + t.Fatalf("get session: %s", err) + } else { + if len(s.Flocks) != 1 || len(s.Plocks) != 1 || len(s.Sustained) != 1 { + t.Fatalf("incorrect session: flock %d plock %d sustained %d", len(s.Flocks), len(s.Plocks), len(s.Sustained)) + } + } + if err = m.CloseSession(); err != nil { + t.Fatalf("close session: %s", err) + } + if _, err = m.GetSession(sid); err == nil { + t.Fatalf("get a deleted session: %s", err) + } + switch m := m.(type) { + case *redisMeta: + s, err = m.getSession(strconv.FormatUint(sid, 10), true) + case *dbMeta: + s, err = m.getSession(&session{Sid: sid}, true) + case *kvMeta: + s, err = m.getSession(sid, true) + } + if err != nil { + t.Fatalf("get session: %s", err) + } + var empty SessionInfo + if s.SessionInfo != empty { + t.Fatalf("incorrect session info %+v", s.SessionInfo) + } + if len(s.Flocks) != 0 || len(s.Plocks) != 0 || len(s.Sustained) != 0 { + t.Fatalf("incorrect session: flock %d plock %d sustained %d", len(s.Flocks), len(s.Plocks), len(s.Sustained)) + } +} + +func testTrash(t *testing.T, m Meta) { + if err := m.Init(Format{Name: "test", TrashDays: 1}, false); err != nil { + t.Fatalf("init: %s", err) + } + ctx := Background + var inode, parent Ino + var attr = &Attr{} + if st := m.Create(ctx, 1, "f1", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f1: %s", st) + } + if st := m.Create(ctx, 1, "f2", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f2: %s", st) + } + if st := m.Mkdir(ctx, 1, "d", 0755, 022, 0, &parent, attr); st != 0 { + t.Fatalf("mkdir d: %s", st) + } + if st := m.Create(ctx, parent, "f", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create d/f: %s", st) + } + if st := m.Rename(ctx, 1, "f1", 1, "d", 0, &inode, attr); st != syscall.ENOTEMPTY { + t.Fatalf("rename f1 -> d: %s", st) + } + if st := m.Unlink(ctx, parent, "f"); st != 0 { + t.Fatalf("unlink d/f: %s", st) + } + if st := m.Rename(ctx, 1, "f1", 1, "d", 0, &inode, attr); st != 0 { + t.Fatalf("rename f1 -> d: %s", st) + } + if st := m.Rename(ctx, 1, "f2", TrashInode, "td", 0, &inode, attr); st != syscall.EPERM { + t.Fatalf("rename f2 -> td: %s", st) + } + if st := m.Rename(ctx, 1, "f2", TrashInode+1, "td", 0, &inode, attr); st != syscall.EPERM { + t.Fatalf("rename f2 -> td: %s", st) + } + if st := m.Rename(ctx, 1, "f2", 1, "d", 0, &inode, attr); st != 0 { + t.Fatalf("rename f2 -> d: %s", st) + } + if st := m.Unlink(ctx, 1, "d"); st != 0 { + t.Fatalf("unlink d: %s", st) + } + var entries []*Entry + if st := m.Readdir(ctx, 1, 0, &entries); st != 0 { + t.Fatalf("readdir: %s", st) + } + if len(entries) != 2 { + t.Fatalf("entries: %d", len(entries)) + } + entries = entries[:0] + if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 { + t.Fatalf("readdir: %s", st) + } + if len(entries) != 6 { + t.Fatalf("entries: %d", len(entries)) + } + ctx2 := NewContext(1000, 1, []uint32{1}) + if st := m.Unlink(ctx2, TrashInode+1, "d"); st != syscall.EPERM { + t.Fatalf("unlink d: %s", st) + } + if st := m.Rmdir(ctx2, TrashInode+1, "d"); st != syscall.EPERM { + t.Fatalf("rmdir d: %s", st) + } + if st := m.Rename(ctx2, TrashInode+1, "d", 1, "f", 0, &inode, attr); st != syscall.EPERM { + t.Fatalf("rename d -> f: %s", st) + } + switch bm := m.(type) { + case *redisMeta: + bm.doCleanupTrash(true) + case *dbMeta: + bm.doCleanupTrash(true) + case *kvMeta: + bm.doCleanupTrash(true) + } + if st := m.GetAttr(ctx2, TrashInode+1, attr); st != syscall.ENOENT { + t.Fatalf("getattr: %s", st) + } +} + +func testOpenCache(t *testing.T, m Meta) { + ctx := Background + var inode Ino + var attr = &Attr{} + if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 { + t.Fatalf("create f: %s", st) + } + defer m.Unlink(ctx, 1, "f") + if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 { + t.Fatalf("open f: %s", st) + } + defer m.Close(ctx, inode) + + var attr2 = &Attr{} + if st := m.GetAttr(ctx, inode, attr2); st != 0 { + t.Fatalf("getattr f: %s", st) + } + if *attr != *attr2 { + t.Fatalf("attrs not the same: attr %+v; attr2 %+v", *attr, *attr2) + } + attr2.Uid = 1 + if st := m.SetAttr(ctx, inode, SetAttrUID, 0, attr2); st != 0 { + t.Fatalf("setattr f: %s", st) + } + if st := m.GetAttr(ctx, inode, attr); st != 0 { + t.Fatalf("getattr f: %s", st) + } + if attr.Uid != 1 { + t.Fatalf("attr uid should be 1: %+v", *attr) + } +} + +func testReadOnly(t *testing.T, m Meta) { + ctx := Background + if err := m.NewSession(); err != nil { + t.Fatalf("new session: %s", err) + } + defer m.CloseSession() + + var inode Ino + var attr = &Attr{} + if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &inode, attr); st != syscall.EROFS { + t.Fatalf("mkdir d: %s", st) + } + if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != syscall.EROFS { + t.Fatalf("create f: %s", st) + } + if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != syscall.EROFS { + t.Fatalf("open f: %s", st) + } +} diff --git a/pkg/meta/slice.go b/pkg/meta/slice.go new file mode 100644 index 0000000..09efb00 --- /dev/null +++ b/pkg/meta/slice.go @@ -0,0 +1,188 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "github.com/juicedata/juicefs/pkg/utils" + +type slice struct { + chunkid uint64 + size uint32 + off uint32 + len uint32 + pos uint32 + left *slice + right *slice +} + +func newSlice(pos uint32, chunkid uint64, cleng, off, len uint32) *slice { + if len == 0 { + return nil + } + s := &slice{} + s.pos = pos + s.chunkid = chunkid + s.size = cleng + s.off = off + s.len = len + s.left = nil + s.right = nil + return s +} + +func (s *slice) read(buf []byte) { + rb := utils.ReadBuffer(buf) + s.pos = rb.Get32() + s.chunkid = rb.Get64() + s.size = rb.Get32() + s.off = rb.Get32() + s.len = rb.Get32() +} + +func (s *slice) cut(pos uint32) (left, right *slice) { + if s == nil { + return nil, nil + } + if pos <= s.pos { + if s.left == nil { + s.left = newSlice(pos, 0, 0, 0, s.pos-pos) + } + left, s.left = s.left.cut(pos) + return left, s + } else if pos < s.pos+s.len { + l := pos - s.pos + right = newSlice(pos, s.chunkid, s.size, s.off+l, s.len-l) + right.right = s.right + s.len = l + s.right = nil + return s, right + } else { + if s.right == nil { + s.right = newSlice(s.pos+s.len, 0, 0, 0, pos-s.pos-s.len) + } + s.right, right = s.right.cut(pos) + return s, right + } +} + +func (s *slice) visit(f func(*slice)) { + if s == nil { + return + } + s.left.visit(f) + right := s.right + f(s) // s could be freed + right.visit(f) +} + +const sliceBytes = 24 + +func marshalSlice(pos uint32, chunkid uint64, size, off, len uint32) []byte { + w := utils.NewBuffer(sliceBytes) + w.Put32(pos) + w.Put64(chunkid) + w.Put32(size) + w.Put32(off) + w.Put32(len) + return w.Bytes() +} + +func readSlices(vals []string) []*slice { + slices := make([]slice, len(vals)) + ss := make([]*slice, len(vals)) + for i, val := range vals { + s := &slices[i] + s.read([]byte(val)) + ss[i] = s + } + return ss +} + +func readSliceBuf(buf []byte) []*slice { + if len(buf)%sliceBytes != 0 { + logger.Errorf("corrupt slices: len=%d", len(buf)) + return nil + } + nSlices := len(buf) / sliceBytes + slices := make([]slice, nSlices) + ss := make([]*slice, nSlices) + for i := 0; i < len(buf); i += sliceBytes { + s := &slices[i/sliceBytes] + s.read(buf[i:]) + ss[i/sliceBytes] = s + } + return ss +} + +func buildSlice(ss []*slice) []Slice { + var root *slice + for i := range ss { + s := new(slice) + *s = *ss[i] + var right *slice + s.left, right = root.cut(s.pos) + _, s.right = right.cut(s.pos + s.len) + root = s + } + var pos uint32 + var chunk []Slice + root.visit(func(s *slice) { + if s.pos > pos { + chunk = append(chunk, Slice{Size: s.pos - pos, Len: s.pos - pos}) + pos = s.pos + } + chunk = append(chunk, Slice{Chunkid: s.chunkid, Size: s.size, Off: s.off, Len: s.len}) + pos += s.len + }) + return chunk +} + +func compactChunk(ss []*slice) (uint32, uint32, []Slice) { + var chunk = buildSlice(ss) + var pos uint32 + if len(chunk) > 0 && chunk[0].Chunkid == 0 { + pos = chunk[0].Len + chunk = chunk[1:] + } + var size uint32 + for _, c := range chunk { + size += c.Len + } + return pos, size, chunk +} + +func skipSome(chunk []*slice) int { + var skipped int + var total = len(chunk) + for skipped < total { + ss := chunk[skipped:] + pos, size, c := compactChunk(ss) + first := ss[0] + if first.len < (1<<20) || first.len*5 < size || size == 0 { + // it's too small + break + } + isFirst := func(pos uint32, s Slice) bool { + return pos == first.pos && s.Chunkid == first.chunkid && s.Off == first.off && s.Len == first.len + } + if !isFirst(pos, c[0]) { + // it's not the first slice, compact it + break + } + skipped++ + } + return skipped +} diff --git a/pkg/meta/sql.go b/pkg/meta/sql.go new file mode 100644 index 0000000..fdb3cc4 --- /dev/null +++ b/pkg/meta/sql.go @@ -0,0 +1,2883 @@ +//go:build !nosqlite || !nomysql || !nopg +// +build !nosqlite !nomysql !nopg + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bufio" + "bytes" + "database/sql" + "encoding/json" + "errors" + "fmt" + "io" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/sirupsen/logrus" + "xorm.io/xorm" + "xorm.io/xorm/log" + "xorm.io/xorm/names" +) + +type setting struct { + Name string `xorm:"pk"` + Value string `xorm:"varchar(4096) notnull"` +} + +type counter struct { + Name string `xorm:"pk"` + Value int64 `xorm:"notnull"` +} + +type edge struct { + Parent Ino `xorm:"unique(edge) notnull"` + Name string `xorm:"unique(edge) notnull"` + Inode Ino `xorm:"notnull"` + Type uint8 `xorm:"notnull"` +} + +type node struct { + Inode Ino `xorm:"pk"` + Type uint8 `xorm:"notnull"` + Flags uint8 `xorm:"notnull"` + Mode uint16 `xorm:"notnull"` + Uid uint32 `xorm:"notnull"` + Gid uint32 `xorm:"notnull"` + Atime int64 `xorm:"notnull"` + Mtime int64 `xorm:"notnull"` + Ctime int64 `xorm:"notnull"` + Nlink uint32 `xorm:"notnull"` + Length uint64 `xorm:"notnull"` + Rdev uint32 + Parent Ino +} + +type namedNode struct { + node `xorm:"extends"` + Name string +} + +type chunk struct { + Inode Ino `xorm:"unique(chunk) notnull"` + Indx uint32 `xorm:"unique(chunk) notnull"` + Slices []byte `xorm:"blob notnull"` +} +type chunkRef struct { + Chunkid uint64 `xorm:"pk"` + Size uint32 `xorm:"notnull"` + Refs int `xorm:"notnull"` +} +type symlink struct { + Inode Ino `xorm:"pk"` + Target string `xorm:"varchar(4096) notnull"` +} + +type xattr struct { + Inode Ino `xorm:"unique(name) notnull"` + Name string `xorm:"unique(name) notnull"` + Value []byte `xorm:"blob notnull"` +} + +type flock struct { + Inode Ino `xorm:"notnull unique(flock)"` + Sid uint64 `xorm:"notnull unique(flock)"` + Owner int64 `xorm:"notnull unique(flock)"` + Ltype byte `xorm:"notnull"` +} + +type plock struct { + Inode Ino `xorm:"notnull unique(plock)"` + Sid uint64 `xorm:"notnull unique(plock)"` + Owner int64 `xorm:"notnull unique(plock)"` + Records []byte `xorm:"blob notnull"` +} + +type session struct { + Sid uint64 `xorm:"pk"` + Heartbeat int64 `xorm:"notnull"` + Info []byte `xorm:"blob"` +} + +type sustained struct { + Sid uint64 `xorm:"unique(sustained) notnull"` + Inode Ino `xorm:"unique(sustained) notnull"` +} + +type delfile struct { + Inode Ino `xorm:"pk notnull"` + Length uint64 `xorm:"notnull"` + Expire int64 `xorm:"notnull"` +} + +type dbMeta struct { + baseMeta + db *xorm.Engine + snap *dbSnap +} +type dbSnap struct { + node map[Ino]*node + symlink map[Ino]*symlink + xattr map[Ino][]*xattr + edges map[Ino][]*edge + chunk map[string]*chunk +} + +func newSQLMeta(driver, addr string, conf *Config) (Meta, error) { + if driver == "postgres" { + addr = driver + "://" + addr + } + engine, err := xorm.NewEngine(driver, addr) + if err != nil { + return nil, fmt.Errorf("unable to use data source %s: %s", driver, err) + } + switch logger.Level { // make xorm less verbose + case logrus.TraceLevel: + engine.SetLogLevel(log.LOG_DEBUG) + case logrus.DebugLevel: + engine.SetLogLevel(log.LOG_INFO) + case logrus.InfoLevel, logrus.WarnLevel: + engine.SetLogLevel(log.LOG_WARNING) + case logrus.ErrorLevel: + engine.SetLogLevel(log.LOG_ERR) + default: + engine.SetLogLevel(log.LOG_OFF) + } + + start := time.Now() + if err = engine.Ping(); err != nil { + return nil, fmt.Errorf("ping database: %s", err) + } + if time.Since(start) > time.Millisecond { + logger.Warnf("The latency to database is too high: %s", time.Since(start)) + } + + engine.SetTableMapper(names.NewPrefixMapper(engine.GetTableMapper(), "jfs_")) + m := &dbMeta{ + baseMeta: newBaseMeta(conf), + db: engine, + } + m.en = m + m.root, err = lookupSubdir(m, conf.Subdir) + + return m, err +} + +func (m *dbMeta) Shutdown() error { + return m.db.Close() +} + +func (m *dbMeta) Name() string { + return m.db.DriverName() +} + +func (m *dbMeta) doDeleteSlice(chunkid uint64, size uint32) error { + return m.txn(func(ses *xorm.Session) error { + _, err := ses.Exec("delete from jfs_chunk_ref where chunkid=?", chunkid) + return err + }) +} + +func (m *dbMeta) updateCollate() { + if r, err := m.db.Query("show create table jfs_edge"); err != nil { + logger.Fatalf("show table jfs_edge: %s", err.Error()) + } else { + createTable := string(r[0]["Create Table"]) + // the default collate is case-insensitive + if !strings.Contains(createTable, "SET utf8mb4 COLLATE utf8mb4_bin") { + _, err := m.db.Exec("alter table jfs_edge modify name varchar (255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL") + if err != nil && strings.Contains(err.Error(), "Error 1071: Specified key was too long; max key length is 767 bytes") { + // MySQL 5.6 supports key length up to 767 bytes, so reduce the length of name to 190 chars + _, err = m.db.Exec("alter table jfs_edge modify name varchar (190) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL") + } + if err != nil { + logger.Fatalf("update collate: %s", err) + } + } + } +} + +func (m *dbMeta) Init(format Format, force bool) error { + if err := m.db.Sync2(new(setting), new(counter)); err != nil { + logger.Fatalf("create table setting, counter: %s", err) + } + if err := m.db.Sync2(new(edge)); err != nil && !strings.Contains(err.Error(), "Duplicate entry") { + logger.Fatalf("create table edge: %s", err) + } + if err := m.db.Sync2(new(node), new(symlink), new(xattr)); err != nil { + logger.Fatalf("create table node, symlink, xattr: %s", err) + } + if err := m.db.Sync2(new(chunk), new(chunkRef)); err != nil { + logger.Fatalf("create table chunk, chunk_ref: %s", err) + } + if err := m.db.Sync2(new(session), new(sustained), new(delfile)); err != nil { + logger.Fatalf("create table session, sustaind, delfile: %s", err) + } + if err := m.db.Sync2(new(flock), new(plock)); err != nil { + logger.Fatalf("create table flock, plock: %s", err) + } + if m.db.DriverName() == "mysql" { + m.updateCollate() + } + + var s = setting{Name: "format"} + ok, err := m.db.Get(&s) + if err != nil { + return err + } + + if ok { + var old Format + err = json.Unmarshal([]byte(s.Value), &old) + if err != nil { + return fmt.Errorf("json: %s", err) + } + if force { + old.SecretKey = "removed" + logger.Warnf("Existing volume will be overwrited: %+v", old) + } else { + format.UUID = old.UUID + // these can be safely updated. + old.Bucket = format.Bucket + old.AccessKey = format.AccessKey + old.SecretKey = format.SecretKey + old.Capacity = format.Capacity + old.Inodes = format.Inodes + old.TrashDays = format.TrashDays + if format != old { + old.SecretKey = "" + format.SecretKey = "" + return fmt.Errorf("cannot update format from %+v to %+v", old, format) + } + } + } + + data, err := json.MarshalIndent(format, "", "") + if err != nil { + return fmt.Errorf("json: %s", err) + } + + m.fmt = format + now := time.Now() + n := &node{ + Type: TypeDirectory, + Atime: now.UnixNano() / 1000, + Mtime: now.UnixNano() / 1000, + Ctime: now.UnixNano() / 1000, + Nlink: 2, + Length: 4 << 10, + Parent: 1, + } + return m.txn(func(s *xorm.Session) error { + if format.TrashDays > 0 { + ok2, err := s.Get(&node{Inode: TrashInode}) + if err != nil { + return err + } + if !ok2 { + n.Inode = TrashInode + n.Mode = 0555 + if err = mustInsert(s, n); err != nil { + return err + } + } + } + if ok { + _, err = s.Update(&setting{"format", string(data)}, &setting{Name: "format"}) + return err + } + var set = &setting{"format", string(data)} + n.Inode = 1 + n.Mode = 0777 + var cs = []counter{ + {"nextInode", 2}, // 1 is root + {"nextChunk", 1}, + {"nextSession", 0}, + {"usedSpace", 0}, + {"totalInodes", 0}, + {"nextCleanupSlices", 0}, + } + return mustInsert(s, set, n, &cs) + }) +} + +func (m *dbMeta) Reset() error { + return m.db.DropTables(&setting{}, &counter{}, + &node{}, &edge{}, &symlink{}, &xattr{}, + &chunk{}, &chunkRef{}, + &session{}, &sustained{}, &delfile{}, + &flock{}, &plock{}) +} + +func (m *dbMeta) Load() (*Format, error) { + var s = setting{Name: "format"} + ok, err := m.db.Get(&s) + if err == nil && !ok { + err = fmt.Errorf("database is not formatted") + } + if err != nil { + return nil, err + } + + err = json.Unmarshal([]byte(s.Value), &m.fmt) + if err != nil { + return nil, fmt.Errorf("json: %s", err) + } + return &m.fmt, nil +} + +func (m *dbMeta) NewSession() error { + go m.refreshUsage() + if m.conf.ReadOnly { + return nil + } + if err := m.db.Sync2(new(session)); err != nil { // old client has no info field + return err + } + if m.db.DriverName() == "mysql" { + m.updateCollate() + } + // update the owner from uint64 to int64 + if err := m.db.Sync2(new(flock), new(plock)); err != nil { + logger.Fatalf("update table flock, plock: %s", err) + } + + info := newSessionInfo() + info.MountPoint = m.conf.MountPoint + data, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("json: %s", err) + } + var v int64 + for { + v, err = m.incrCounter("nextSession", 1) + if err != nil { + return fmt.Errorf("create session: %s", err) + } + err = m.txn(func(s *xorm.Session) error { + return mustInsert(s, &session{uint64(v), time.Now().Unix(), data}) + }) + if err == nil { + break + } + if strings.Contains(err.Error(), "UNIQUE constraint failed") { + logger.Warnf("session id %d is already used", v) + continue + } + if err != nil { + return fmt.Errorf("insert new session: %s", err) + } + } + m.sid = uint64(v) + logger.Debugf("session is %d", m.sid) + + go m.refreshSession() + go m.cleanupDeletedFiles() + go m.cleanupSlices() + go m.cleanupTrash() + go m.flushStats() + return nil +} + +func (m *dbMeta) getSession(row *session, detail bool) (*Session, error) { + var s Session + if row.Info == nil { // legacy client has no info + row.Info = []byte("{}") + } + if err := json.Unmarshal(row.Info, &s); err != nil { + return nil, fmt.Errorf("corrupted session info; json error: %s", err) + } + s.Sid = row.Sid + s.Heartbeat = time.Unix(row.Heartbeat, 0) + if detail { + var ( + srows []sustained + frows []flock + prows []plock + ) + if err := m.db.Find(&srows, &sustained{Sid: s.Sid}); err != nil { + return nil, fmt.Errorf("find sustained %d: %s", s.Sid, err) + } + s.Sustained = make([]Ino, 0, len(srows)) + for _, srow := range srows { + s.Sustained = append(s.Sustained, srow.Inode) + } + + if err := m.db.Find(&frows, &flock{Sid: s.Sid}); err != nil { + return nil, fmt.Errorf("find flock %d: %s", s.Sid, err) + } + s.Flocks = make([]Flock, 0, len(frows)) + for _, frow := range frows { + s.Flocks = append(s.Flocks, Flock{frow.Inode, uint64(frow.Owner), string(frow.Ltype)}) + } + + if err := m.db.Find(&prows, &plock{Sid: s.Sid}); err != nil { + return nil, fmt.Errorf("find plock %d: %s", s.Sid, err) + } + s.Plocks = make([]Plock, 0, len(prows)) + for _, prow := range prows { + s.Plocks = append(s.Plocks, Plock{prow.Inode, uint64(prow.Owner), prow.Records}) + } + } + return &s, nil +} + +func (m *dbMeta) GetSession(sid uint64) (*Session, error) { + row := session{Sid: sid} + ok, err := m.db.Get(&row) + if err != nil { + return nil, err + } + if !ok { + return nil, fmt.Errorf("session not found: %d", sid) + } + return m.getSession(&row, true) +} + +func (m *dbMeta) ListSessions() ([]*Session, error) { + var rows []session + err := m.db.Find(&rows) + if err != nil { + return nil, err + } + sessions := make([]*Session, 0, len(rows)) + for i := range rows { + s, err := m.getSession(&rows[i], false) + if err != nil { + logger.Errorf("get session: %s", err) + continue + } + sessions = append(sessions, s) + } + return sessions, nil +} + +func (m *dbMeta) incrCounter(name string, batch int64) (int64, error) { + var v int64 + err := m.txn(func(s *xorm.Session) error { + var c = counter{Name: name} + ok, err := s.Get(&c) + if err != nil { + return err + } + v = c.Value + batch + if batch > 0 { + c.Value = v + if ok { + _, err = s.Cols("value").Update(&c, &counter{Name: name}) + } else { + err = mustInsert(s, &c) + } + } + return err + }) + return v, err +} + +func mustInsert(s *xorm.Session, beans ...interface{}) error { + var start, end int + batchSize := 200 + for i := 0; i < len(beans)/batchSize; i++ { + end = start + batchSize + inserted, err := s.Insert(beans[start:end]...) + if err == nil && int(inserted) < end-start { + return fmt.Errorf("%d records not inserted: %+v", end-start-int(inserted), beans[start:end]) + } + start = end + } + if len(beans)%batchSize != 0 { + inserted, err := s.Insert(beans[end:]...) + if err == nil && int(inserted) < len(beans)-end { + return fmt.Errorf("%d records not inserted: %+v", len(beans)-end-int(inserted), beans[end:]) + } + } + return nil +} + +var errBusy error + +func (m *dbMeta) shouldRetry(err error) bool { + if err == nil { + return false + } + if _, ok := err.(syscall.Errno); ok { + return false + } + // TODO: add other retryable errors here + msg := err.Error() + switch m.db.DriverName() { + case "sqlite3": + return errors.Is(err, errBusy) || strings.Contains(msg, "database is locked") + case "mysql": + // MySQL, MariaDB or TiDB + return strings.Contains(msg, "try restarting transaction") || strings.Contains(msg, "try again later") + case "postgres": + return strings.Contains(msg, "current transaction is aborted") || strings.Contains(msg, "deadlock detected") + default: + return false + } +} + +func (m *dbMeta) txn(f func(s *xorm.Session) error) error { + if m.conf.ReadOnly { + return syscall.EROFS + } + start := time.Now() + defer func() { txDist.Observe(time.Since(start).Seconds()) }() + var err error + for i := 0; i < 50; i++ { + _, err = m.db.Transaction(func(s *xorm.Session) (interface{}, error) { + s.ForUpdate() + return nil, f(s) + }) + if m.shouldRetry(err) { + txRestart.Add(1) + logger.Debugf("conflicted transaction, restart it (tried %d): %s", i+1, err) + time.Sleep(time.Millisecond * time.Duration(i*i)) + continue + } + break + } + return err +} + +func (m *dbMeta) parseAttr(n *node, attr *Attr) { + if attr == nil { + return + } + attr.Typ = n.Type + attr.Mode = n.Mode + attr.Flags = n.Flags + attr.Uid = n.Uid + attr.Gid = n.Gid + attr.Atime = n.Atime / 1e6 + attr.Atimensec = uint32(n.Atime % 1e6 * 1000) + attr.Mtime = n.Mtime / 1e6 + attr.Mtimensec = uint32(n.Mtime % 1e6 * 1000) + attr.Ctime = n.Ctime / 1e6 + attr.Ctimensec = uint32(n.Ctime % 1e6 * 1000) + attr.Nlink = n.Nlink + attr.Length = n.Length + attr.Rdev = n.Rdev + attr.Parent = n.Parent + attr.Full = true +} + +func (m *dbMeta) flushStats() { + var inttype = "BIGINT" + if m.db.DriverName() == "mysql" { + inttype = "SIGNED" + } + for { + newSpace := atomic.SwapInt64(&m.newSpace, 0) + newInodes := atomic.SwapInt64(&m.newInodes, 0) + if newSpace != 0 || newInodes != 0 { + err := m.txn(func(s *xorm.Session) error { + _, err := s.Exec("UPDATE jfs_counter SET value=value+ CAST((CASE name WHEN 'usedSpace' THEN ? ELSE ? END) AS "+inttype+") WHERE name='usedSpace' OR name='totalInodes' ", newSpace, newInodes) + return err + }) + if err != nil && !strings.Contains(err.Error(), "attempt to write a readonly database") { + logger.Warnf("update stats: %s", err) + m.updateStats(newSpace, newInodes) + } + } + time.Sleep(time.Second) + } +} + +func (m *dbMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno { + dbSession := m.db.Table(&edge{}) + if attr != nil { + dbSession = dbSession.Join("INNER", &node{}, "jfs_edge.inode=jfs_node.inode") + } + nn := namedNode{node: node{Parent: parent}, Name: name} + exist, err := dbSession.Select("*").Get(&nn) + if err != nil { + return errno(err) + } + if !exist { + return syscall.ENOENT + } + *inode = nn.Inode + m.parseAttr(&nn.node, attr) + return 0 +} + +func (m *dbMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { + var n = node{Inode: inode} + ok, err := m.db.Get(&n) + if ok { + m.parseAttr(&n, attr) + } else if err == nil { + err = syscall.ENOENT + } + return errno(err) +} + +func clearSUGIDSQL(ctx Context, cur *node, set *Attr) { + switch runtime.GOOS { + case "darwin": + if ctx.Uid() != 0 { + // clear SUID and SGID + cur.Mode &= 01777 + set.Mode &= 01777 + } + case "linux": + // same as ext + if cur.Type != TypeDirectory { + if ctx.Uid() != 0 || (cur.Mode>>3)&1 != 0 { + // clear SUID and SGID + cur.Mode &= 01777 + set.Mode &= 01777 + } else { + // keep SGID if the file is non-group-executable + cur.Mode &= 03777 + set.Mode &= 03777 + } + } + } +} + +func (m *dbMeta) SetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + return errno(m.txn(func(s *xorm.Session) error { + var cur = node{Inode: inode} + ok, err := s.Get(&cur) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if (set&(SetAttrUID|SetAttrGID)) != 0 && (set&SetAttrMode) != 0 { + attr.Mode |= (cur.Mode & 06000) + } + var changed bool + if (cur.Mode&06000) != 0 && (set&(SetAttrUID|SetAttrGID)) != 0 { + clearSUGIDSQL(ctx, &cur, attr) + changed = true + } + if set&SetAttrUID != 0 && cur.Uid != attr.Uid { + cur.Uid = attr.Uid + changed = true + } + if set&SetAttrGID != 0 && cur.Gid != attr.Gid { + cur.Gid = attr.Gid + changed = true + } + if set&SetAttrMode != 0 { + if ctx.Uid() != 0 && (attr.Mode&02000) != 0 { + if ctx.Gid() != cur.Gid { + attr.Mode &= 05777 + } + } + if attr.Mode != cur.Mode { + cur.Mode = attr.Mode + changed = true + } + } + now := time.Now().UnixNano() / 1e3 + if set&SetAttrAtime != 0 { + cur.Atime = attr.Atime*1e6 + int64(attr.Atimensec)/1e3 + changed = true + } + if set&SetAttrAtimeNow != 0 { + cur.Atime = now + changed = true + } + if set&SetAttrMtime != 0 { + cur.Mtime = attr.Mtime*1e6 + int64(attr.Mtimensec)/1e3 + changed = true + } + if set&SetAttrMtimeNow != 0 { + cur.Mtime = now + changed = true + } + m.parseAttr(&cur, attr) + if !changed { + return nil + } + cur.Ctime = now + _, err = s.Cols("mode", "uid", "gid", "atime", "mtime", "ctime").Update(&cur, &node{Inode: inode}) + if err == nil { + m.parseAttr(&cur, attr) + } + return err + })) +} + +func (m *dbMeta) appendSlice(s *xorm.Session, inode Ino, indx uint32, buf []byte) error { + var r sql.Result + var err error + driver := m.db.DriverName() + if driver == "sqlite3" || driver == "postgres" { + r, err = s.Exec("update jfs_chunk set slices=slices || ? where inode=? AND indx=?", buf, inode, indx) + } else { + r, err = s.Exec("update jfs_chunk set slices=concat(slices, ?) where inode=? AND indx=?", buf, inode, indx) + } + if err == nil { + if n, _ := r.RowsAffected(); n == 0 { + err = mustInsert(s, &chunk{inode, indx, buf}) + } + } + return err +} + +func (m *dbMeta) Truncate(ctx Context, inode Ino, flags uint8, length uint64, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + var newSpace int64 + err := m.txn(func(s *xorm.Session) error { + var n = node{Inode: inode} + ok, err := s.Get(&n) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if n.Type != TypeFile { + return syscall.EPERM + } + if length == n.Length { + m.parseAttr(&n, attr) + return nil + } + newSpace = align4K(length) - align4K(n.Length) + if newSpace > 0 && m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + var c chunk + var zeroChunks []uint32 + var left, right = n.Length, length + if left > right { + right, left = left, right + } + if right/ChunkSize-left/ChunkSize > 1 { + rows, err := s.Where("inode = ? AND indx > ? AND indx < ?", inode, left/ChunkSize, right/ChunkSize).Cols("indx").Rows(&c) + if err != nil { + return err + } + for rows.Next() { + if err = rows.Scan(&c); err != nil { + _ = rows.Close() + return err + } + zeroChunks = append(zeroChunks, c.Indx) + } + _ = rows.Close() + } + + l := uint32(right - left) + if right > (left/ChunkSize+1)*ChunkSize { + l = ChunkSize - uint32(left%ChunkSize) + } + if err = m.appendSlice(s, inode, uint32(left/ChunkSize), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l)); err != nil { + return err + } + buf := marshalSlice(0, 0, 0, 0, ChunkSize) + for _, indx := range zeroChunks { + if err = m.appendSlice(s, inode, indx, buf); err != nil { + return err + } + } + if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 { + if err = m.appendSlice(s, inode, uint32(right/ChunkSize), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize))); err != nil { + return err + } + } + n.Length = length + now := time.Now().UnixNano() / 1e3 + n.Mtime = now + n.Ctime = now + if _, err = s.Cols("length", "mtime", "ctime").Update(&n, &node{Inode: n.Inode}); err != nil { + return err + } + m.parseAttr(&n, attr) + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *dbMeta) Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64) syscall.Errno { + if mode&fallocCollapesRange != 0 && mode != fallocCollapesRange { + return syscall.EINVAL + } + if mode&fallocInsertRange != 0 && mode != fallocInsertRange { + return syscall.EINVAL + } + if mode == fallocInsertRange || mode == fallocCollapesRange { + return syscall.ENOTSUP + } + if mode&fallocPunchHole != 0 && mode&fallocKeepSize == 0 { + return syscall.EINVAL + } + if size == 0 { + return syscall.EINVAL + } + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + var newSpace int64 + err := m.txn(func(s *xorm.Session) error { + var n = node{Inode: inode} + ok, err := s.Get(&n) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if n.Type == TypeFIFO { + return syscall.EPIPE + } + if n.Type != TypeFile { + return syscall.EPERM + } + length := n.Length + if off+size > n.Length { + if mode&fallocKeepSize == 0 { + length = off + size + } + } + + old := n.Length + newSpace = align4K(length) - align4K(n.Length) + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + now := time.Now().UnixNano() / 1e3 + n.Length = length + n.Mtime = now + n.Ctime = now + if _, err := s.Cols("length", "mtime", "ctime").Update(&n, &node{Inode: inode}); err != nil { + return err + } + if mode&(fallocZeroRange|fallocPunchHole) != 0 { + if off+size > old { + size = old - off + } + for size > 0 { + indx := uint32(off / ChunkSize) + coff := off % ChunkSize + l := size + if coff+size > ChunkSize { + l = ChunkSize - coff + } + err = m.appendSlice(s, inode, indx, marshalSlice(uint32(coff), 0, 0, 0, uint32(l))) + if err != nil { + return err + } + off += l + size -= l + } + } + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *dbMeta) doReadlink(ctx Context, inode Ino) ([]byte, error) { + var l = symlink{Inode: inode} + _, err := m.db.Get(&l) + return []byte(l.Target), err +} + +func (m *dbMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno { + if m.checkQuota(4<<10, 1) { + return syscall.ENOSPC + } + parent = m.checkRoot(parent) + var ino Ino + var err error + if parent == TrashInode { + var next int64 + next, err = m.incrCounter("nextTrash", 1) + ino = TrashInode + Ino(next) + } else { + ino, err = m.nextInode() + } + if err != nil { + return errno(err) + } + var n node + n.Inode = ino + n.Type = _type + n.Mode = mode & ^cumask + n.Uid = ctx.Uid() + n.Gid = ctx.Gid() + if _type == TypeDirectory { + n.Nlink = 2 + n.Length = 4 << 10 + } else { + n.Nlink = 1 + if _type == TypeSymlink { + n.Length = uint64(len(path)) + } else { + n.Length = 0 + n.Rdev = rdev + } + } + n.Parent = parent + if inode != nil { + *inode = ino + } + + err = m.txn(func(s *xorm.Session) error { + var pn = node{Inode: parent} + ok, err := s.Get(&pn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if pn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var e = edge{Parent: parent, Name: name} + ok, err = s.Get(&e) + if err != nil { + return err + } + var foundIno Ino + var foundType uint8 + if ok { + foundType, foundIno = e.Type, e.Inode + } else if m.conf.CaseInsensi { + if entry := m.resolveCase(ctx, parent, name); entry != nil { + foundType, foundIno = entry.Attr.Typ, entry.Inode + } + } + if foundIno != 0 { + if _type == TypeFile || _type == TypeDirectory { + foundNode := node{Inode: foundIno} + ok, err = s.Get(&foundNode) + if err != nil { + return err + } else if ok { + m.parseAttr(&foundNode, attr) + } else if attr != nil { + *attr = Attr{Typ: foundType, Parent: parent} // corrupt entry + } + if inode != nil { + *inode = foundIno + } + } + return syscall.EEXIST + } + + now := time.Now().UnixNano() / 1e3 + if _type == TypeDirectory { + pn.Nlink++ + } + pn.Mtime = now + pn.Ctime = now + n.Atime = now + n.Mtime = now + n.Ctime = now + if pn.Mode&02000 != 0 || ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" { + n.Gid = pn.Gid + if _type == TypeDirectory && runtime.GOOS == "linux" { + n.Mode |= pn.Mode & 02000 + } + } + + if err = mustInsert(s, &edge{parent, name, ino, _type}, &n); err != nil { + return err + } + if _, err := s.Cols("nlink", "mtime", "ctime").Update(&pn, &node{Inode: pn.Inode}); err != nil { + return err + } + if _type == TypeSymlink { + if err = mustInsert(s, &symlink{Inode: ino, Target: path}); err != nil { + return err + } + } + m.parseAttr(&n, attr) + return nil + }) + if err == nil { + m.updateStats(align4K(0), 1) + } + return errno(err) +} + +func (m *dbMeta) doUnlink(ctx Context, parent Ino, name string) syscall.Errno { + var trash Ino + if st := m.checkTrash(parent, &trash); st != 0 { + return st + } + var newSpace, newInode int64 + var n node + var opened bool + err := m.txn(func(s *xorm.Session) error { + var pn = node{Inode: parent} + ok, err := s.Get(&pn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if pn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var e = edge{Parent: parent, Name: name} + ok, err = s.Get(&e) + if err != nil { + return err + } + if !ok && m.conf.CaseInsensi { + if ee := m.resolveCase(ctx, parent, name); ee != nil { + ok = true + e.Name = string(ee.Name) + e.Inode = ee.Inode + e.Type = ee.Attr.Typ + } + } + if !ok { + return syscall.ENOENT + } + if e.Type == TypeDirectory { + return syscall.EPERM + } + + n = node{Inode: e.Inode} + ok, err = s.Get(&n) + if err != nil { + return err + } + now := time.Now().UnixNano() / 1e3 + opened = false + if ok { + if ctx.Uid() != 0 && pn.Mode&01000 != 0 && ctx.Uid() != pn.Uid && ctx.Uid() != n.Uid { + return syscall.EACCES + } + n.Ctime = now + if trash == 0 { + n.Nlink-- + if n.Type == TypeFile && n.Nlink == 0 { + opened = m.of.IsOpen(e.Inode) + } + } else if n.Nlink == 1 { + n.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, name) + trash = 0 + } + defer func() { m.of.InvalidateChunk(e.Inode, 0xFFFFFFFE) }() + + pn.Mtime = now + pn.Ctime = now + + if _, err := s.Delete(&edge{Parent: parent, Name: e.Name}); err != nil { + return err + } + if _, err = s.Cols("mtime", "ctime").Update(&pn, &node{Inode: pn.Inode}); err != nil { + return err + } + if n.Nlink > 0 { + if _, err := s.Cols("nlink", "ctime").Update(&n, &node{Inode: e.Inode}); err != nil { + return err + } + if trash > 0 { + if err = mustInsert(s, &edge{trash, fmt.Sprintf("%d-%d-%s", parent, e.Inode, e.Name), e.Inode, e.Type}); err != nil { + return err + } + } + } else { + switch e.Type { + case TypeFile: + if opened { + if err = mustInsert(s, sustained{m.sid, e.Inode}); err != nil { + return err + } + if _, err := s.Cols("nlink", "ctime").Update(&n, &node{Inode: e.Inode}); err != nil { + return err + } + } else { + if err = mustInsert(s, delfile{e.Inode, n.Length, time.Now().Unix()}); err != nil { + return err + } + if _, err := s.Delete(&node{Inode: e.Inode}); err != nil { + return err + } + newSpace, newInode = -align4K(n.Length), -1 + } + case TypeSymlink: + if _, err := s.Delete(&symlink{Inode: e.Inode}); err != nil { + return err + } + fallthrough + default: + if _, err := s.Delete(&node{Inode: e.Inode}); err != nil { + return err + } + newSpace, newInode = -align4K(0), -1 + } + if _, err := s.Delete(&xattr{Inode: e.Inode}); err != nil { + return err + } + } + return err + }) + if err == nil && trash == 0 { + if n.Type == TypeFile && n.Nlink == 0 { + m.fileDeleted(opened, n.Inode, n.Length) + } + m.updateStats(newSpace, newInode) + } + return errno(err) +} + +func (m *dbMeta) doRmdir(ctx Context, parent Ino, name string) syscall.Errno { + var trash Ino + if st := m.checkTrash(parent, &trash); st != 0 { + return st + } + err := m.txn(func(s *xorm.Session) error { + var pn = node{Inode: parent} + ok, err := s.Get(&pn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if pn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var e = edge{Parent: parent, Name: name} + ok, err = s.Get(&e) + if err != nil { + return err + } + if !ok && m.conf.CaseInsensi { + if ee := m.resolveCase(ctx, parent, name); ee != nil { + ok = true + e.Inode = ee.Inode + e.Name = string(ee.Name) + e.Type = ee.Attr.Typ + } + } + if !ok { + return syscall.ENOENT + } + if e.Type != TypeDirectory { + return syscall.ENOTDIR + } + exist, err := s.Exist(&edge{Parent: e.Inode}) + if err != nil { + return err + } + if exist { + return syscall.ENOTEMPTY + } + var n = node{Inode: e.Inode} + ok, err = s.Get(&n) + if err != nil { + return err + } + + now := time.Now().UnixNano() / 1e3 + if ok { + if ctx.Uid() != 0 && pn.Mode&01000 != 0 && ctx.Uid() != pn.Uid && ctx.Uid() != n.Uid { + return syscall.EACCES + } + if trash > 0 { + n.Ctime = now + n.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, name) + trash = 0 + } + pn.Nlink-- + pn.Mtime = now + pn.Ctime = now + + if _, err := s.Delete(&edge{Parent: parent, Name: e.Name}); err != nil { + return err + } + if trash > 0 { + if _, err = s.Cols("nlink", "ctime").Update(&n, &node{Inode: n.Inode}); err != nil { + return err + } + if err = mustInsert(s, &edge{trash, fmt.Sprintf("%d-%d-%s", parent, e.Inode, e.Name), e.Inode, e.Type}); err != nil { + return err + } + } else { + if _, err := s.Delete(&node{Inode: e.Inode}); err != nil { + return err + } + if _, err := s.Delete(&xattr{Inode: e.Inode}); err != nil { + return err + } + } + _, err = s.Cols("nlink", "mtime", "ctime").Update(&pn, &node{Inode: pn.Inode}) + return err + }) + if err == nil && trash == 0 { + m.updateStats(-align4K(0), -1) + } + return errno(err) +} + +func (m *dbMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno { + var trash Ino + if st := m.checkTrash(parentDst, &trash); st != 0 { + return st + } + exchange := flags == RenameExchange + var opened bool + var dino Ino + var dn node + var newSpace, newInode int64 + err := m.txn(func(s *xorm.Session) error { + var se = edge{Parent: parentSrc, Name: nameSrc} + ok, err := s.Get(&se) + if err != nil { + return err + } + if !ok && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parentSrc, nameSrc); e != nil { + ok = true + se.Inode = e.Inode + se.Type = e.Attr.Typ + se.Name = string(e.Name) + } + } + if !ok { + return syscall.ENOENT + } + if parentSrc == parentDst && se.Name == nameDst { + if inode != nil { + *inode = se.Inode + } + return nil + } + var spn = node{Inode: parentSrc} + ok, err = s.Get(&spn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if spn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var dpn = node{Inode: parentDst} + ok, err = s.Get(&dpn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if dpn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var sn = node{Inode: se.Inode} + ok, err = s.Get(&sn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + + var de = edge{Parent: parentDst, Name: nameDst} + ok, err = s.Get(&de) + if err != nil { + return err + } + if !ok && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parentDst, nameDst); e != nil { + ok = true + de.Inode = e.Inode + de.Type = e.Attr.Typ + de.Name = string(e.Name) + } + } + now := time.Now().UnixNano() / 1e3 + opened = false + dn = node{Inode: de.Inode} + if ok { + if flags == RenameNoReplace { + return syscall.EEXIST + } + dino = de.Inode + ok, err := s.Get(&dn) + if err != nil { + return err + } + if !ok { // corrupt entry + logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, de.Name) + trash = 0 + } + dn.Ctime = now + if exchange { + dn.Parent = parentSrc + if de.Type == TypeDirectory && parentSrc != parentDst { + dpn.Nlink-- + spn.Nlink++ + } + } else { + if de.Type == TypeDirectory { + exist, err := s.Exist(&edge{Parent: de.Inode}) + if err != nil { + return err + } + if exist { + return syscall.ENOTEMPTY + } + dpn.Nlink-- + if trash > 0 { + dn.Parent = trash + } + } else { + if trash == 0 { + dn.Nlink-- + if de.Type == TypeFile && dn.Nlink == 0 { + opened = m.of.IsOpen(dn.Inode) + } + defer func() { m.of.InvalidateChunk(dino, 0xFFFFFFFE) }() + } else if dn.Nlink == 1 { + dn.Parent = trash + } + } + } + if ctx.Uid() != 0 && dpn.Mode&01000 != 0 && ctx.Uid() != dpn.Uid && ctx.Uid() != dn.Uid { + return syscall.EACCES + } + } else { + if exchange { + return syscall.ENOENT + } + dino = 0 + } + if ctx.Uid() != 0 && spn.Mode&01000 != 0 && ctx.Uid() != spn.Uid && ctx.Uid() != sn.Uid { + return syscall.EACCES + } + + spn.Mtime = now + spn.Ctime = now + dpn.Mtime = now + dpn.Ctime = now + sn.Parent = parentDst + sn.Ctime = now + if se.Type == TypeDirectory && parentSrc != parentDst { + spn.Nlink-- + dpn.Nlink++ + } + if inode != nil { + *inode = sn.Inode + } + m.parseAttr(&sn, attr) + + if exchange { + if _, err := s.Cols("inode", "type").Update(&de, &edge{Parent: parentSrc, Name: se.Name}); err != nil { + return err + } + if _, err := s.Cols("inode", "type").Update(&se, &edge{Parent: parentDst, Name: de.Name}); err != nil { + return err + } + if _, err := s.Cols("ctime", "parent").Update(dn, &node{Inode: dino}); err != nil { + return err + } + } else { + if n, err := s.Delete(&edge{Parent: parentSrc, Name: se.Name}); err != nil { + return err + } else if n != 1 { + return fmt.Errorf("delete src failed") + } + if dino > 0 { + if trash > 0 { + if _, err := s.Cols("ctime", "parent").Update(dn, &node{Inode: dino}); err != nil { + return err + } + name := fmt.Sprintf("%d-%d-%s", parentDst, dino, de.Name) + if err = mustInsert(s, &edge{trash, name, dino, de.Type}); err != nil { + return err + } + } else if de.Type != TypeDirectory && dn.Nlink > 0 { + if _, err := s.Cols("ctime", "nlink").Update(dn, &node{Inode: dino}); err != nil { + return err + } + } else { + if de.Type == TypeFile { + if opened { + if _, err := s.Cols("nlink", "ctime").Update(&dn, &node{Inode: dino}); err != nil { + return err + } + if err = mustInsert(s, sustained{m.sid, dino}); err != nil { + return err + } + } else { + if err = mustInsert(s, delfile{dino, dn.Length, time.Now().Unix()}); err != nil { + return err + } + if _, err := s.Delete(&node{Inode: dino}); err != nil { + return err + } + newSpace, newInode = -align4K(dn.Length), -1 + } + } else { + if de.Type == TypeSymlink { + if _, err := s.Delete(&symlink{Inode: dino}); err != nil { + return err + } + } + if _, err := s.Delete(&node{Inode: dino}); err != nil { + return err + } + newSpace, newInode = -align4K(0), -1 + } + if _, err := s.Delete(&xattr{Inode: dino}); err != nil { + return err + } + } + if _, err := s.Delete(&edge{Parent: parentDst, Name: de.Name}); err != nil { + return err + } + } + if err = mustInsert(s, &edge{parentDst, de.Name, se.Inode, se.Type}); err != nil { + return err + } + } + if parentDst != parentSrc && !isTrash(parentSrc) { + if _, err := s.Cols("nlink", "mtime", "ctime").Update(&spn, &node{Inode: parentSrc}); err != nil { + return err + } + } + if _, err := s.Cols("ctime", "parent").Update(&sn, &node{Inode: sn.Inode}); err != nil { + return err + } + if _, err := s.Cols("nlink", "mtime", "ctime").Update(&dpn, &node{Inode: parentDst}); err != nil { + return err + } + return err + }) + if err == nil && !exchange && trash == 0 { + if dino > 0 && dn.Type == TypeFile && dn.Nlink == 0 { + m.fileDeleted(opened, dino, dn.Length) + } + m.updateStats(newSpace, newInode) + } + return errno(err) +} + +func (m *dbMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno { + return errno(m.txn(func(s *xorm.Session) error { + var pn = node{Inode: parent} + ok, err := s.Get(&pn) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if pn.Type != TypeDirectory { + return syscall.ENOTDIR + } + var e = edge{Parent: parent, Name: name} + ok, err = s.Get(&e) + if err != nil { + return err + } + if ok || !ok && m.conf.CaseInsensi && m.resolveCase(ctx, parent, name) != nil { + return syscall.EEXIST + } + + var n = node{Inode: inode} + ok, err = s.Get(&n) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if n.Type == TypeDirectory { + return syscall.EPERM + } + + now := time.Now().UnixNano() / 1e3 + pn.Mtime = now + pn.Ctime = now + n.Nlink++ + n.Ctime = now + + if err = mustInsert(s, &edge{Parent: parent, Name: name, Inode: inode, Type: n.Type}); err != nil { + return err + } + if _, err := s.Cols("mtime", "ctime").Update(&pn, &node{Inode: parent}); err != nil { + return err + } + if _, err := s.Cols("nlink", "ctime").Update(&n, node{Inode: inode}); err != nil { + return err + } + if err == nil { + m.parseAttr(&n, attr) + } + return err + })) +} + +func (m *dbMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) syscall.Errno { + dbSession := m.db.Table(&edge{}) + if plus != 0 { + dbSession = dbSession.Join("INNER", &node{}, "jfs_edge.inode=jfs_node.inode") + } + var nodes []namedNode + if err := dbSession.Find(&nodes, &edge{Parent: inode}); err != nil { + return errno(err) + } + for _, n := range nodes { + entry := &Entry{ + Inode: n.Inode, + Name: []byte(n.Name), + Attr: &Attr{}, + } + if plus != 0 { + m.parseAttr(&n.node, entry.Attr) + } else { + entry.Attr.Typ = n.Type + } + *entries = append(*entries, entry) + } + return 0 +} + +func (m *dbMeta) doCleanStaleSession(sid uint64) { + // release locks + _, _ = m.db.Delete(flock{Sid: sid}) + _, _ = m.db.Delete(plock{Sid: sid}) + + var s = sustained{Sid: sid} + rows, err := m.db.Rows(&s) + if err != nil { + logger.Warnf("scan stale session %d: %s", sid, err) + return + } + + var inodes []Ino + for rows.Next() { + if rows.Scan(&s) == nil { + inodes = append(inodes, s.Inode) + } + } + _ = rows.Close() + + done := true + for _, inode := range inodes { + if err := m.doDeleteSustainedInode(sid, inode); err != nil { + logger.Errorf("Failed to delete inode %d: %s", inode, err) + done = false + } + } + if done { + _ = m.txn(func(ses *xorm.Session) error { + _, err = ses.Delete(&session{Sid: sid}) + logger.Infof("cleanup session %d: %s", sid, err) + return err + }) + } +} + +func (m *dbMeta) CleanStaleSessions() { + var s session + rows, err := m.db.Where("Heartbeat < ?", time.Now().Add(time.Minute*-5).Unix()).Rows(&s) + if err != nil { + logger.Warnf("scan stale sessions: %s", err) + return + } + var ids []uint64 + for rows.Next() { + if rows.Scan(&s) == nil { + ids = append(ids, s.Sid) + } + } + _ = rows.Close() + for _, sid := range ids { + m.doCleanStaleSession(sid) + } +} + +func (m *dbMeta) refreshSession() { + for { + time.Sleep(time.Minute) + m.Lock() + if m.umounting { + m.Unlock() + return + } + _ = m.txn(func(ses *xorm.Session) error { + n, err := ses.Cols("Heartbeat").Update(&session{Heartbeat: time.Now().Unix()}, &session{Sid: m.sid}) + if err == nil && n == 0 { + err = fmt.Errorf("no session found matching sid: %d", m.sid) + } + if err != nil { + logger.Errorf("update session: %s", err) + } + return err + }) + m.Unlock() + if _, err := m.Load(); err != nil { + logger.Warnf("reload setting: %s", err) + } + go m.CleanStaleSessions() + } +} + +func (m *dbMeta) doDeleteSustainedInode(sid uint64, inode Ino) error { + var n = node{Inode: inode} + var newSpace int64 + err := m.txn(func(s *xorm.Session) error { + ok, err := s.Get(&n) + if err != nil { + return err + } + if !ok { + return nil + } + if err = mustInsert(s, &delfile{inode, n.Length, time.Now().Unix()}); err != nil { + return err + } + _, err = s.Delete(&sustained{sid, inode}) + if err != nil { + return err + } + newSpace = -align4K(n.Length) + _, err = s.Delete(&node{Inode: inode}) + return err + }) + if err == nil { + m.updateStats(newSpace, -1) + go m.doDeleteFileData(inode, n.Length) + } + return err +} + +func (m *dbMeta) Read(ctx Context, inode Ino, indx uint32, chunks *[]Slice) syscall.Errno { + f := m.of.find(inode) + if f != nil { + f.RLock() + defer f.RUnlock() + } + if cs, ok := m.of.ReadChunk(inode, indx); ok { + *chunks = cs + return 0 + } + defer timeit(time.Now()) + var c chunk + _, err := m.db.Where("inode=? and indx=?", inode, indx).Get(&c) + if err != nil { + return errno(err) + } + ss := readSliceBuf(c.Slices) + if ss == nil { + return syscall.EIO + } + *chunks = buildSlice(ss) + m.of.CacheChunk(inode, indx, *chunks) + if !m.conf.ReadOnly && (len(c.Slices)/sliceBytes >= 5 || len(*chunks) >= 5) { + go m.compactChunk(inode, indx, false) + } + return 0 +} + +func (m *dbMeta) Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice) syscall.Errno { + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, indx) }() + var newSpace int64 + var needCompact bool + err := m.txn(func(s *xorm.Session) error { + var n = node{Inode: inode} + ok, err := s.Get(&n) + if err != nil { + return err + } + if !ok { + return syscall.ENOENT + } + if n.Type != TypeFile { + return syscall.EPERM + } + newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len) + if newleng > n.Length { + newSpace = align4K(newleng) - align4K(n.Length) + n.Length = newleng + } + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + now := time.Now().UnixNano() / 1e3 + n.Mtime = now + n.Ctime = now + + var ck chunk + ok, err = s.Where("Inode = ? and Indx = ?", inode, indx).Get(&ck) + if err != nil { + return err + } + buf := marshalSlice(off, slice.Chunkid, slice.Size, slice.Off, slice.Len) + if ok { + if err := m.appendSlice(s, inode, indx, buf); err != nil { + return err + } + } else { + if err = mustInsert(s, &chunk{inode, indx, buf}); err != nil { + return err + } + } + if err = mustInsert(s, chunkRef{slice.Chunkid, slice.Size, 1}); err != nil { + return err + } + _, err = s.Cols("length", "mtime", "ctime").Update(&n, &node{Inode: inode}) + if err == nil { + needCompact = (len(ck.Slices)/sliceBytes)%100 == 99 + } + return err + }) + if err == nil { + if needCompact { + go m.compactChunk(inode, indx, false) + } + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *dbMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied *uint64) syscall.Errno { + defer timeit(time.Now()) + f := m.of.find(fout) + if f != nil { + f.Lock() + defer f.Unlock() + } + var newSpace int64 + defer func() { m.of.InvalidateChunk(fout, 0xFFFFFFFF) }() + err := m.txn(func(s *xorm.Session) error { + var nin, nout = node{Inode: fin}, node{Inode: fout} + ok, err := s.Get(&nin) + if err != nil { + return err + } + ok2, err2 := s.Get(&nout) + if err2 != nil { + return err2 + } + if !ok || !ok2 { + return syscall.ENOENT + } + if nin.Type != TypeFile { + return syscall.EINVAL + } + if offIn >= nin.Length { + *copied = 0 + return nil + } + if offIn+size > nin.Length { + size = nin.Length - offIn + } + if nout.Type != TypeFile { + return syscall.EINVAL + } + + newleng := offOut + size + if newleng > nout.Length { + newSpace = align4K(newleng) - align4K(nout.Length) + nout.Length = newleng + } + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + now := time.Now().UnixNano() / 1e3 + nout.Mtime = now + nout.Ctime = now + + var c chunk + rows, err := s.Where("inode = ? AND indx >= ? AND indx <= ?", fin, offIn/ChunkSize, (offIn+size)/ChunkSize).Rows(&c) + if err != nil { + return err + } + chunks := make(map[uint32][]*slice) + for rows.Next() { + err = rows.Scan(&c) + if err != nil { + _ = rows.Close() + return err + } + chunks[c.Indx] = readSliceBuf(c.Slices) + } + _ = rows.Close() + + ses := s + updateSlices := func(indx uint32, buf []byte, chunkid uint64, size uint32) error { + if err := m.appendSlice(ses, fout, indx, buf); err != nil { + return err + } + if chunkid > 0 { + if _, err := ses.Exec("update jfs_chunk_ref set refs=refs+1 where chunkid = ? AND size = ?", chunkid, size); err != nil { + return err + } + } + return nil + } + coff := offIn / ChunkSize * ChunkSize + for coff < offIn+size { + if coff%ChunkSize != 0 { + panic("coff") + } + // Add a zero chunk for hole + ss := append([]*slice{{len: ChunkSize}}, chunks[uint32(coff/ChunkSize)]...) + cs := buildSlice(ss) + for _, s := range cs { + pos := coff + coff += uint64(s.Len) + if pos < offIn+size && pos+uint64(s.Len) > offIn { + if pos < offIn { + dec := offIn - pos + s.Off += uint32(dec) + pos += dec + s.Len -= uint32(dec) + } + if pos+uint64(s.Len) > offIn+size { + dec := pos + uint64(s.Len) - (offIn + size) + s.Len -= uint32(dec) + } + doff := pos - offIn + offOut + indx := uint32(doff / ChunkSize) + dpos := uint32(doff % ChunkSize) + if dpos+s.Len > ChunkSize { + if err := updateSlices(indx, marshalSlice(dpos, s.Chunkid, s.Size, s.Off, ChunkSize-dpos), s.Chunkid, s.Size); err != nil { + return err + } + skip := ChunkSize - dpos + if err := updateSlices(indx+1, marshalSlice(0, s.Chunkid, s.Size, s.Off+skip, s.Len-skip), s.Chunkid, s.Size); err != nil { + return err + } + } else { + if err := updateSlices(indx, marshalSlice(dpos, s.Chunkid, s.Size, s.Off, s.Len), s.Chunkid, s.Size); err != nil { + return err + } + } + } + } + } + if _, err := s.Cols("length", "mtime", "ctime").Update(&nout, &node{Inode: fout}); err != nil { + return err + } + *copied = size + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *dbMeta) cleanupDeletedFiles() { + for { + time.Sleep(time.Minute) + var d delfile + rows, err := m.db.Where("expire < ?", time.Now().Add(-time.Hour).Unix()).Rows(&d) + if err != nil { + continue + } + var fs []delfile + for rows.Next() { + if rows.Scan(&d) == nil { + fs = append(fs, d) + } + } + _ = rows.Close() + for _, f := range fs { + logger.Debugf("cleanup chunks of inode %d with %d bytes", f.Inode, f.Length) + m.doDeleteFileData(f.Inode, f.Length) + } + } +} + +func (m *dbMeta) cleanupSlices() { + for { + time.Sleep(time.Hour) + + // once per hour + var c = counter{Name: "nextCleanupSlices"} + _, err := m.db.Get(&c) + if err != nil { + continue + } + now := time.Now().Unix() + if c.Value+3600 > now { + continue + } + _ = m.txn(func(ses *xorm.Session) error { + _, err := ses.Update(&counter{Value: now}, counter{Name: "nextCleanupSlices"}) + return err + }) + m.doCleanupSlices() + } +} + +func (m *dbMeta) doCleanupSlices() { + var ck chunkRef + rows, err := m.db.Where("refs <= 0").Rows(&ck) + if err != nil { + return + } + var cks []chunkRef + for rows.Next() { + if rows.Scan(&ck) == nil { + cks = append(cks, ck) + } + } + _ = rows.Close() + for _, ck := range cks { + m.deleteSlice(ck.Chunkid, ck.Size) + } +} + +func (m *dbMeta) deleteChunk(inode Ino, indx uint32) error { + var c chunk + var ss []*slice + err := m.txn(func(ses *xorm.Session) error { + ok, err := ses.Where("inode = ? AND indx = ?", inode, indx).Get(&c) + if err != nil { + return err + } + if !ok { + return nil + } + ss = readSliceBuf(c.Slices) + for _, s := range ss { + _, err = ses.Exec("update jfs_chunk_ref set refs=refs-1 where chunkid=? AND size=?", s.chunkid, s.size) + if err != nil { + return err + } + } + c.Slices = nil + n, err := ses.Where("inode = ? AND indx = ?", inode, indx).Delete(&c) + if err == nil && n == 0 { + err = fmt.Errorf("chunk %d:%d changed, try restarting transaction", inode, indx) + } + return err + }) + if err != nil { + return fmt.Errorf("delete slice from chunk %s fail: %s, retry later", inode, err) + } + for _, s := range ss { + var ref = chunkRef{Chunkid: s.chunkid} + ok, err := m.db.Get(&ref) + if err == nil && ok && ref.Refs <= 0 { + m.deleteSlice(s.chunkid, s.size) + } + } + return nil +} + +func (m *dbMeta) doDeleteFileData(inode Ino, length uint64) { + var c = chunk{Inode: inode} + rows, err := m.db.Rows(&c) + if err != nil { + return + } + var indexes []uint32 + for rows.Next() { + if rows.Scan(&c) == nil { + indexes = append(indexes, c.Indx) + } + } + _ = rows.Close() + for _, indx := range indexes { + err = m.deleteChunk(inode, indx) + if err != nil { + logger.Warnf("deleteChunk inode %d index %d error: %s", inode, indx, err) + return + } + } + _, _ = m.db.Delete(delfile{Inode: inode}) +} + +func (m *dbMeta) compactChunk(inode Ino, indx uint32, force bool) { + if !force { + // avoid too many or duplicated compaction + m.Lock() + k := uint64(inode) + (uint64(indx) << 32) + if len(m.compacting) > 10 || m.compacting[k] { + m.Unlock() + return + } + m.compacting[k] = true + m.Unlock() + defer func() { + m.Lock() + delete(m.compacting, k) + m.Unlock() + }() + } + + var c chunk + _, err := m.db.Where("inode=? and indx=?", inode, indx).Get(&c) + if err != nil { + return + } + + ss := readSliceBuf(c.Slices) + skipped := skipSome(ss) + ss = ss[skipped:] + pos, size, chunks := compactChunk(ss) + if len(ss) < 2 || size == 0 { + return + } + + var chunkid uint64 + st := m.NewChunk(Background, &chunkid) + if st != 0 { + return + } + logger.Debugf("compact %d:%d: skipped %d slices (%d bytes) %d slices (%d bytes)", inode, indx, skipped, pos, len(ss), size) + err = m.newMsg(CompactChunk, chunks, chunkid) + if err != nil { + if !strings.Contains(err.Error(), "not exist") && !strings.Contains(err.Error(), "not found") { + logger.Warnf("compact %d %d with %d slices: %s", inode, indx, len(ss), err) + } + return + } + err = m.txn(func(ses *xorm.Session) error { + var c2 = chunk{Inode: inode} + _, err := ses.Where("indx=?", indx).Get(&c2) + if err != nil { + return err + } + if len(c2.Slices) < len(c.Slices) || !bytes.Equal(c.Slices, c2.Slices[:len(c.Slices)]) { + logger.Infof("chunk %d:%d was changed %d -> %d", inode, indx, len(c.Slices), len(c2.Slices)) + return syscall.EINVAL + } + + c2.Slices = append(append(c2.Slices[:skipped*sliceBytes], marshalSlice(pos, chunkid, size, 0, size)...), c2.Slices[len(c.Slices):]...) + if _, err := ses.Where("Inode = ? AND indx = ?", inode, indx).Update(c2); err != nil { + return err + } + // create the key to tracking it + if err = mustInsert(ses, chunkRef{chunkid, size, 1}); err != nil { + return err + } + for _, s := range ss { + if _, err := ses.Exec("update jfs_chunk_ref set refs=refs-1 where chunkid=? and size=?", s.chunkid, s.size); err != nil { + return err + } + } + return nil + }) + // there could be false-negative that the compaction is successful, double-check + if err != nil { + var c = chunkRef{Chunkid: chunkid} + ok, e := m.db.Get(&c) + if e == nil { + if ok { + err = nil + } else { + logger.Infof("compacted chunk %d was not used", chunkid) + err = syscall.EINVAL + } + } + } + + if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINVAL { + logger.Infof("compaction for %d:%d is wasted, delete slice %d (%d bytes)", inode, indx, chunkid, size) + m.deleteSlice(chunkid, size) + } else if err == nil { + m.of.InvalidateChunk(inode, indx) + for _, s := range ss { + var ref = chunkRef{Chunkid: s.chunkid} + ok, err := m.db.Get(&ref) + if err == nil && ok && ref.Refs <= 0 { + m.deleteSlice(s.chunkid, s.size) + } + } + } else { + logger.Warnf("compact %d %d: %s", inode, indx, err) + } + go func() { + // wait for the current compaction to finish + time.Sleep(time.Millisecond * 10) + m.compactChunk(inode, indx, force) + }() +} + +func dup(b []byte) []byte { + r := make([]byte, len(b)) + copy(r, b) + return r +} + +func (m *dbMeta) CompactAll(ctx Context, bar *utils.Bar) syscall.Errno { + var c chunk + rows, err := m.db.Where("length(slices) >= ?", sliceBytes*2).Cols("inode", "indx").Rows(&c) + if err != nil { + return errno(err) + } + var cs []chunk + for rows.Next() { + if rows.Scan(&c) == nil { + c.Slices = dup(c.Slices) + cs = append(cs, c) + } + } + _ = rows.Close() + + bar.IncrTotal(int64(len(cs))) + for _, c := range cs { + logger.Debugf("compact chunk %d:%d (%d slices)", c.Inode, c.Indx, len(c.Slices)/sliceBytes) + m.compactChunk(c.Inode, c.Indx, true) + bar.Increment() + } + return 0 +} + +func (m *dbMeta) ListSlices(ctx Context, slices map[Ino][]Slice, delete bool, showProgress func()) syscall.Errno { + if delete { + m.doCleanupSlices() + } + var c chunk + rows, err := m.db.Rows(&c) + if err != nil { + return errno(err) + } + defer rows.Close() + + for rows.Next() { + err = rows.Scan(&c) + if err != nil { + return errno(err) + } + ss := readSliceBuf(c.Slices) + for _, s := range ss { + if s.chunkid > 0 { + slices[c.Inode] = append(slices[c.Inode], Slice{Chunkid: s.chunkid, Size: s.size}) + if showProgress != nil { + showProgress() + } + } + } + } + return 0 +} + +func (m *dbMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + var x = xattr{Inode: inode, Name: name} + ok, err := m.db.Get(&x) + if err != nil { + return errno(err) + } + if !ok { + return ENOATTR + } + *vbuff = x.Value + return 0 +} + +func (m *dbMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + var x = xattr{Inode: inode} + rows, err := m.db.Where("inode = ?", inode).Rows(&x) + if err != nil { + return errno(err) + } + defer rows.Close() + *names = nil + for rows.Next() { + err = rows.Scan(&x) + if err != nil { + return errno(err) + } + *names = append(*names, []byte(x.Name)...) + *names = append(*names, 0) + } + return 0 +} + +func (m *dbMeta) SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = m.checkRoot(inode) + return errno(m.txn(func(s *xorm.Session) error { + var x = xattr{inode, name, value} + var err error + var n int64 + switch flags { + case XattrCreate: + n, err = s.Insert(&x) + if err != nil || n == 0 { + err = syscall.EEXIST + } + case XattrReplace: + n, err = s.Update(&x, &xattr{inode, name, nil}) + if err == nil && n == 0 { + err = ENOATTR + } + default: + n, err = s.Insert(&x) + if err != nil || n == 0 { + if m.db.DriverName() == "postgres" { + // cleanup failed session + _ = s.Rollback() + } + _, err = s.Update(&x, &xattr{inode, name, nil}) + } + } + return err + })) +} + +func (m *dbMeta) RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = m.checkRoot(inode) + return errno(m.txn(func(s *xorm.Session) error { + n, err := s.Delete(&xattr{Inode: inode, Name: name}) + if err != nil { + return err + } else if n == 0 { + return ENOATTR + } else { + return nil + } + })) +} + +func (m *dbMeta) dumpEntry(inode Ino) (*DumpedEntry, error) { + e := &DumpedEntry{} + return e, m.txn(func(s *xorm.Session) error { + n := &node{Inode: inode} + ok, err := m.db.Get(n) + if err != nil { + return err + } + if !ok { + logger.Warnf("The entry of the inode was not found. inode: %v", inode) + return nil + } + attr := &Attr{} + m.parseAttr(n, attr) + e.Attr = dumpAttr(attr) + e.Attr.Inode = inode + + var rows []xattr + if err = m.db.Find(&rows, &xattr{Inode: inode}); err != nil { + return err + } + if len(rows) > 0 { + xattrs := make([]*DumpedXattr, 0, len(rows)) + for _, x := range rows { + xattrs = append(xattrs, &DumpedXattr{x.Name, string(x.Value)}) + } + sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name }) + e.Xattrs = xattrs + } + + if attr.Typ == TypeFile { + for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ { + c := &chunk{Inode: inode, Indx: indx} + if ok, err = m.db.Get(c); err != nil { + return err + } + if !ok { + logger.Warnf("no found chunk target for inode %d indx %d", inode, indx) + return nil + } + ss := readSliceBuf(c.Slices) + slices := make([]*DumpedSlice, 0, len(ss)) + for _, s := range ss { + slices = append(slices, &DumpedSlice{Chunkid: s.chunkid, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len}) + } + e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices}) + } + } else if attr.Typ == TypeSymlink { + l := &symlink{Inode: inode} + ok, err = m.db.Get(l) + if err != nil { + return err + } + if !ok { + logger.Warnf("no link target for inode %d", inode) + return nil + } + e.Symlink = l.Target + } + + return nil + }) +} +func (m *dbMeta) dumpEntryFast(inode Ino) *DumpedEntry { + e := &DumpedEntry{} + n, ok := m.snap.node[inode] + if !ok { + if inode != TrashInode { + logger.Warnf("The entry of the inode was not found. inode: %v", inode) + } + return nil + } + attr := &Attr{} + m.parseAttr(n, attr) + e.Attr = dumpAttr(attr) + e.Attr.Inode = inode + + rows, ok := m.snap.xattr[inode] + if ok && len(rows) > 0 { + xattrs := make([]*DumpedXattr, 0, len(rows)) + for _, x := range rows { + xattrs = append(xattrs, &DumpedXattr{x.Name, string(x.Value)}) + } + sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name }) + e.Xattrs = xattrs + } + + if attr.Typ == TypeFile { + for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ { + c, ok := m.snap.chunk[fmt.Sprintf("%d-%d", inode, indx)] + if !ok { + logger.Warnf("no found chunk target for inode %d indx %d", inode, indx) + return nil + } + ss := readSliceBuf(c.Slices) + slices := make([]*DumpedSlice, 0, len(ss)) + for _, s := range ss { + slices = append(slices, &DumpedSlice{Chunkid: s.chunkid, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len}) + } + e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices}) + } + } else if attr.Typ == TypeSymlink { + l, ok := m.snap.symlink[inode] + if !ok { + logger.Warnf("no link target for inode %d", inode) + return nil + } + e.Symlink = l.Target + } + return e +} + +func (m *dbMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth int, showProgress func(totalIncr, currentIncr int64)) error { + bwWrite := func(s string) { + if _, err := bw.WriteString(s); err != nil { + panic(err) + } + } + var edges []*edge + var err error + var ok bool + if m.snap != nil { + edges, ok = m.snap.edges[inode] + if !ok { + logger.Warnf("no edge target for inode %d", inode) + } + } else { + if err := m.db.Find(&edges, &edge{Parent: inode}); err != nil { + return err + } + } + + if showProgress != nil { + showProgress(int64(len(edges)), 0) + } + if err := tree.writeJsonWithOutEntry(bw, depth); err != nil { + return err + } + sort.Slice(edges, func(i, j int) bool { return edges[i].Name < edges[j].Name }) + + for idx, e := range edges { + var entry *DumpedEntry + if m.snap != nil { + entry = m.dumpEntryFast(e.Inode) + } else { + entry, err = m.dumpEntry(e.Inode) + if err != nil { + return err + } + } + + if entry == nil { + continue + } + + entry.Name = e.Name + if e.Type == TypeDirectory { + err = m.dumpDir(e.Inode, entry, bw, depth+2, showProgress) + } else { + err = entry.writeJSON(bw, depth+2) + } + if err != nil { + return err + } + if idx != len(edges)-1 { + bwWrite(",") + } + if showProgress != nil { + showProgress(0, 1) + } + } + bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth))) + return nil +} + +func (m *dbMeta) makeSnap(bar *utils.Bar) error { + m.snap = &dbSnap{ + node: make(map[Ino]*node), + symlink: make(map[Ino]*symlink), + xattr: make(map[Ino][]*xattr), + edges: make(map[Ino][]*edge), + chunk: make(map[string]*chunk), + } + + for _, s := range []interface{}{new(node), new(symlink), new(edge), new(xattr), new(chunk)} { + if count, err := m.db.Count(s); err == nil { + bar.IncrTotal(count) + } else { + return err + } + } + + bufferSize := 10000 + if err := m.db.BufferSize(bufferSize).Iterate(new(node), func(idx int, bean interface{}) error { + n := bean.(*node) + m.snap.node[n.Inode] = n + bar.Increment() + return nil + }); err != nil { + return err + } + + if err := m.db.BufferSize(bufferSize).Iterate(new(symlink), func(idx int, bean interface{}) error { + s := bean.(*symlink) + m.snap.symlink[s.Inode] = s + bar.Increment() + return nil + }); err != nil { + return err + } + if err := m.db.BufferSize(bufferSize).Iterate(new(edge), func(idx int, bean interface{}) error { + e := bean.(*edge) + m.snap.edges[e.Parent] = append(m.snap.edges[e.Parent], e) + bar.Increment() + return nil + }); err != nil { + return err + } + + if err := m.db.BufferSize(bufferSize).Iterate(new(xattr), func(idx int, bean interface{}) error { + x := bean.(*xattr) + m.snap.xattr[x.Inode] = append(m.snap.xattr[x.Inode], x) + bar.Increment() + return nil + }); err != nil { + return err + } + + if err := m.db.BufferSize(bufferSize).Iterate(new(chunk), func(idx int, bean interface{}) error { + c := bean.(*chunk) + m.snap.chunk[fmt.Sprintf("%d-%d", c.Inode, c.Indx)] = c + bar.Increment() + return nil + }); err != nil { + return err + } + return nil +} + +func (m *dbMeta) DumpMeta(w io.Writer, root Ino) (err error) { + defer func() { + if p := recover(); p != nil { + if e, ok := p.(error); ok { + err = e + } else { + err = fmt.Errorf("DumpMeta error: %v", p) + } + } + }() + var drows []delfile + if err := m.db.Find(&drows); err != nil { + return err + } + dels := make([]*DumpedDelFile, 0, len(drows)) + for _, row := range drows { + dels = append(dels, &DumpedDelFile{row.Inode, row.Length, row.Expire}) + } + + progress := utils.NewProgress(false, false) + var tree, trash *DumpedEntry + if root == 0 { + root = m.root + } + if root == 1 { + bar := progress.AddCountBar("Snapshot keys", 0) + if err = m.makeSnap(bar); err != nil { + return fmt.Errorf("Fetch all metadata from DB: %s", err) + } + bar.Done() + tree = m.dumpEntryFast(root) + trash = m.dumpEntryFast(TrashInode) + } else { + if tree, err = m.dumpEntry(root); err != nil { + return err + } + } + if tree == nil { + return errors.New("The entry of the root inode was not found") + } + tree.Name = "FSTree" + format, err := m.Load() + if err != nil { + return err + } + + var crows []counter + if err = m.db.Find(&crows); err != nil { + return err + } + counters := &DumpedCounters{} + for _, row := range crows { + switch row.Name { + case "usedSpace": + counters.UsedSpace = row.Value + case "totalInodes": + counters.UsedInodes = row.Value + case "nextInode": + counters.NextInode = row.Value + case "nextChunk": + counters.NextChunk = row.Value + case "nextSession": + counters.NextSession = row.Value + case "nextTrash": + counters.NextTrash = row.Value + } + } + + var srows []sustained + if err = m.db.Find(&srows); err != nil { + return err + } + ss := make(map[uint64][]Ino) + for _, row := range srows { + ss[row.Sid] = append(ss[row.Sid], row.Inode) + } + sessions := make([]*DumpedSustained, 0, len(ss)) + for k, v := range ss { + sessions = append(sessions, &DumpedSustained{k, v}) + } + + dm := DumpedMeta{ + Setting: format, + Counters: counters, + Sustained: sessions, + DelFiles: dels, + } + + bw, err := dm.writeJsonWithOutTree(w) + if err != nil { + return err + } + + bar := progress.AddCountBar("Dumped entries", 1) // with root + bar.Increment() + if trash != nil { + trash.Name = "Trash" + bar.IncrTotal(1) + bar.Increment() + } + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + if err = m.dumpDir(root, tree, bw, 1, showProgress); err != nil { + return err + } + if trash != nil { + if _, err = bw.WriteString(","); err != nil { + return err + } + if err = m.dumpDir(TrashInode, trash, bw, 1, showProgress); err != nil { + return err + } + } + if _, err = bw.WriteString("\n}\n"); err != nil { + return err + } + progress.Done() + m.snap = nil + + return bw.Flush() +} + +func (m *dbMeta) loadEntry(e *DumpedEntry, cs *DumpedCounters, refs map[uint64]*chunkRef) error { + inode := e.Attr.Inode + logger.Debugf("Loading entry inode %d name %s", inode, e.Name) + attr := e.Attr + n := &node{ + Inode: inode, + Type: typeFromString(attr.Type), + Mode: attr.Mode, + Uid: attr.Uid, + Gid: attr.Gid, + Atime: attr.Atime*1e6 + int64(attr.Atimensec)/1e3, + Mtime: attr.Mtime*1e6 + int64(attr.Atimensec)/1e3, + Ctime: attr.Ctime*1e6 + int64(attr.Atimensec)/1e3, + Nlink: attr.Nlink, + Rdev: attr.Rdev, + Parent: e.Parent, + } // Length not set + var beans []interface{} + if n.Type == TypeFile { + n.Length = attr.Length + chunks := make([]*chunk, 0, len(e.Chunks)) + for _, c := range e.Chunks { + if len(c.Slices) == 0 { + continue + } + slices := make([]byte, 0, sliceBytes*len(c.Slices)) + for _, s := range c.Slices { + slices = append(slices, marshalSlice(s.Pos, s.Chunkid, s.Size, s.Off, s.Len)...) + m.Lock() + if refs[s.Chunkid] == nil { + refs[s.Chunkid] = &chunkRef{s.Chunkid, s.Size, 1} + } else { + refs[s.Chunkid].Refs++ + } + m.Unlock() + if cs.NextChunk <= int64(s.Chunkid) { + cs.NextChunk = int64(s.Chunkid) + 1 + } + } + chunks = append(chunks, &chunk{inode, c.Index, slices}) + } + if len(chunks) > 0 { + beans = append(beans, chunks) + } + } else if n.Type == TypeDirectory { + n.Length = 4 << 10 + if len(e.Entries) > 0 { + edges := make([]*edge, 0, len(e.Entries)) + for _, c := range e.Entries { + edges = append(edges, &edge{ + Parent: inode, + Name: c.Name, + Inode: c.Attr.Inode, + Type: typeFromString(c.Attr.Type), + }) + } + beans = append(beans, edges) + } + } else if n.Type == TypeSymlink { + n.Length = uint64(len(e.Symlink)) + beans = append(beans, &symlink{inode, e.Symlink}) + } + if inode > 1 && inode != TrashInode { + cs.UsedSpace += align4K(n.Length) + cs.UsedInodes += 1 + } + if inode < TrashInode { + if cs.NextInode <= int64(inode) { + cs.NextInode = int64(inode) + 1 + } + } else { + if cs.NextTrash < int64(inode)-TrashInode { + cs.NextTrash = int64(inode) - TrashInode + } + } + + if len(e.Xattrs) > 0 { + xattrs := make([]*xattr, 0, len(e.Xattrs)) + for _, x := range e.Xattrs { + xattrs = append(xattrs, &xattr{inode, x.Name, []byte(x.Value)}) + } + beans = append(beans, xattrs) + } + beans = append(beans, n) + s := m.db.NewSession() + defer s.Close() + return mustInsert(s, beans...) +} + +func (m *dbMeta) LoadMeta(r io.Reader) error { + tables, err := m.db.DBMetas() + if err != nil { + return err + } + if len(tables) > 0 { + return fmt.Errorf("Database %s is not empty", m.Name()) + } + if err = m.db.Sync2(new(setting), new(counter)); err != nil { + return fmt.Errorf("create table setting, counter: %s", err) + } + if err = m.db.Sync2(new(node), new(edge), new(symlink), new(xattr)); err != nil { + return fmt.Errorf("create table node, edge, symlink, xattr: %s", err) + } + if err = m.db.Sync2(new(chunk), new(chunkRef)); err != nil { + return fmt.Errorf("create table chunk, chunk_ref: %s", err) + } + if err = m.db.Sync2(new(session), new(sustained), new(delfile)); err != nil { + return fmt.Errorf("create table session, sustaind, delfile: %s", err) + } + if err = m.db.Sync2(new(flock), new(plock)); err != nil { + return fmt.Errorf("create table flock, plock: %s", err) + } + + dec := json.NewDecoder(r) + dm := &DumpedMeta{} + if err = dec.Decode(dm); err != nil { + return err + } + format, err := json.MarshalIndent(dm.Setting, "", "") + if err != nil { + return err + } + + progress := utils.NewProgress(false, false) + bar := progress.AddCountBar("Collected entries", 1) // with root + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + dm.FSTree.Attr.Inode = 1 + entries := make(map[Ino]*DumpedEntry) + if err = collectEntry(dm.FSTree, entries, showProgress); err != nil { + return err + } + if dm.Trash != nil { + bar.IncrTotal(1) + if err = collectEntry(dm.Trash, entries, showProgress); err != nil { + return err + } + } + bar.Done() + + counters := &DumpedCounters{ + NextInode: 2, + NextChunk: 1, + } + refs := make(map[uint64]*chunkRef) + bar = progress.AddCountBar("Loaded entries", int64(len(entries))) + maxNum := 100 + pool := make(chan struct{}, maxNum) + errCh := make(chan error, 100) + done := make(chan struct{}, 1) + var wg sync.WaitGroup + for _, entry := range entries { + select { + case err = <-errCh: + return err + default: + } + pool <- struct{}{} + wg.Add(1) + go func(entry *DumpedEntry) { + defer func() { + bar.Increment() + wg.Done() + <-pool + }() + if err = m.loadEntry(entry, counters, refs); err != nil { + errCh <- err + } + }(entry) + } + + go func() { + wg.Wait() + close(done) + }() + + select { + case err = <-errCh: + return err + case <-done: + } + progress.Done() + logger.Infof("Dumped counters: %+v", *dm.Counters) + logger.Infof("Loaded counters: %+v", *counters) + + beans := make([]interface{}, 0, 4) // setting, counter, delfile, chunkRef + beans = append(beans, &setting{"format", string(format)}) + cs := make([]*counter, 0, 7) + cs = append(cs, &counter{"usedSpace", counters.UsedSpace}) + cs = append(cs, &counter{"totalInodes", counters.UsedInodes}) + cs = append(cs, &counter{"nextInode", counters.NextInode}) + cs = append(cs, &counter{"nextChunk", counters.NextChunk}) + cs = append(cs, &counter{"nextSession", counters.NextSession}) + cs = append(cs, &counter{"nextTrash", counters.NextTrash}) + cs = append(cs, &counter{"nextCleanupSlices", 0}) + beans = append(beans, cs) + if len(dm.DelFiles) > 0 { + dels := make([]*delfile, 0, len(dm.DelFiles)) + for _, d := range dm.DelFiles { + dels = append(dels, &delfile{d.Inode, d.Length, d.Expire}) + } + beans = append(beans, dels) + } + if len(refs) > 0 { + cks := make([]*chunkRef, 0, len(refs)) + for _, v := range refs { + cks = append(cks, v) + } + beans = append(beans, cks) + } + s := m.db.NewSession() + defer s.Close() + return mustInsert(s, beans...) +} diff --git a/pkg/meta/sql_lock.go b/pkg/meta/sql_lock.go new file mode 100644 index 0000000..07bb078 --- /dev/null +++ b/pkg/meta/sql_lock.go @@ -0,0 +1,240 @@ +//go:build !nosqlite || !nomysql || !nopg +// +build !nosqlite !nomysql !nopg + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "fmt" + "syscall" + "time" + + "xorm.io/xorm" +) + +func (m *dbMeta) Flock(ctx Context, inode Ino, owner_ uint64, ltype uint32, block bool) syscall.Errno { + owner := int64(owner_) + if ltype == F_UNLCK { + return errno(m.txn(func(s *xorm.Session) error { + _, err := s.Delete(&flock{Inode: inode, Owner: owner, Sid: m.sid}) + return err + })) + } + var err syscall.Errno + for { + err = errno(m.txn(func(s *xorm.Session) error { + if exists, err := s.Get(&node{Inode: inode}); err != nil || !exists { + if err == nil && !exists { + err = syscall.ENOENT + } + return err + } + rows, err := s.Rows(&flock{Inode: inode}) + if err != nil { + return err + } + type key struct { + sid uint64 + o int64 + } + var locks = make(map[key]flock) + var l flock + for rows.Next() { + if rows.Scan(&l) == nil { + locks[key{l.Sid, l.Owner}] = l + } + } + _ = rows.Close() + + if ltype == F_RDLCK { + for _, l := range locks { + if l.Ltype == 'W' { + return syscall.EAGAIN + } + } + return mustInsert(s, flock{Inode: inode, Owner: owner, Ltype: 'R', Sid: m.sid}) + } + me := key{m.sid, owner} + _, ok := locks[me] + delete(locks, me) + if len(locks) > 0 { + return syscall.EAGAIN + } + if ok { + _, err = s.Cols("Ltype").Update(&flock{Ltype: 'W'}, &flock{Inode: inode, Owner: owner, Sid: m.sid}) + } else { + err = mustInsert(s, flock{Inode: inode, Owner: owner, Ltype: 'W', Sid: m.sid}) + } + return err + })) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return err +} + +func (m *dbMeta) Getlk(ctx Context, inode Ino, owner_ uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno { + if *ltype == F_UNLCK { + *start = 0 + *end = 0 + *pid = 0 + return 0 + } + + owner := int64(owner_) + rows, err := m.db.Rows(&plock{Inode: inode}) + if err != nil { + return errno(err) + } + type key struct { + sid uint64 + o int64 + } + var locks = make(map[key][]byte) + var l plock + for rows.Next() { + if rows.Scan(&l) == nil && !(l.Sid == m.sid && l.Owner == owner) { + locks[key{l.Sid, l.Owner}] = dup(l.Records) + } + } + _ = rows.Close() + + for k, d := range locks { + ls := loadLocks(d) + for _, l := range ls { + // find conflicted locks + if (*ltype == F_WRLCK || l.ltype == F_WRLCK) && *end >= l.start && *start <= l.end { + *ltype = l.ltype + *start = l.start + *end = l.end + if k.sid == m.sid { + *pid = l.pid + } else { + *pid = 0 + } + return 0 + } + } + } + *ltype = F_UNLCK + *start = 0 + *end = 0 + *pid = 0 + return 0 +} + +func (m *dbMeta) Setlk(ctx Context, inode Ino, owner_ uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno { + var err syscall.Errno + lock := plockRecord{ltype, pid, start, end} + owner := int64(owner_) + for { + err = errno(m.txn(func(s *xorm.Session) error { + if exists, err := s.Get(&node{Inode: inode}); err != nil || !exists { + if err == nil && !exists { + err = syscall.ENOENT + } + return err + } + if ltype == F_UNLCK { + var l = plock{Inode: inode, Owner: owner, Sid: m.sid} + ok, err := m.db.Get(&l) + if err != nil { + return err + } + if !ok { + return nil + } + ls := loadLocks(l.Records) + if len(ls) == 0 { + return nil + } + ls = updateLocks(ls, lock) + if len(ls) == 0 { + _, err = s.Delete(&plock{Inode: inode, Owner: owner, Sid: m.sid}) + } else { + _, err = s.Cols("records").Update(plock{Records: dumpLocks(ls)}, l) + } + return err + } + rows, err := s.Rows(&plock{Inode: inode}) + if err != nil { + return err + } + type key struct { + sid uint64 + owner int64 + } + var locks = make(map[key][]byte) + var l plock + for rows.Next() { + if rows.Scan(&l) == nil { + locks[key{l.Sid, l.Owner}] = dup(l.Records) + } + } + _ = rows.Close() + lkey := key{m.sid, owner} + for k, d := range locks { + if k == lkey { + continue + } + ls := loadLocks(d) + for _, l := range ls { + // find conflicted locks + if (ltype == F_WRLCK || l.ltype == F_WRLCK) && end >= l.start && start <= l.end { + return syscall.EAGAIN + } + } + } + ls := updateLocks(loadLocks(locks[lkey]), lock) + var n int64 + if len(locks[lkey]) > 0 { + n, err = s.Cols("records").Update(plock{Records: dumpLocks(ls)}, + &plock{Inode: inode, Sid: m.sid, Owner: owner}) + } else { + n, err = s.InsertOne(&plock{Inode: inode, Sid: m.sid, Owner: owner, Records: dumpLocks(ls)}) + } + if err == nil && n == 0 { + err = fmt.Errorf("insert/update failed") + } + return err + })) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return err +} diff --git a/pkg/meta/sql_mysql.go b/pkg/meta/sql_mysql.go new file mode 100644 index 0000000..efa6c1c --- /dev/null +++ b/pkg/meta/sql_mysql.go @@ -0,0 +1,28 @@ +//go:build !nomysql +// +build !nomysql + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + _ "github.com/go-sql-driver/mysql" +) + +func init() { + Register("mysql", newSQLMeta) +} diff --git a/pkg/meta/sql_pg.go b/pkg/meta/sql_pg.go new file mode 100644 index 0000000..7d4d9a3 --- /dev/null +++ b/pkg/meta/sql_pg.go @@ -0,0 +1,28 @@ +//go:build !nopg +// +build !nopg + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + _ "github.com/lib/pq" +) + +func init() { + Register("postgres", newSQLMeta) +} diff --git a/pkg/meta/sql_sqlite.go b/pkg/meta/sql_sqlite.go new file mode 100644 index 0000000..70ea349 --- /dev/null +++ b/pkg/meta/sql_sqlite.go @@ -0,0 +1,29 @@ +//go:build !nosqlite +// +build !nosqlite + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "github.com/mattn/go-sqlite3" +) + +func init() { + errBusy = sqlite3.ErrBusy + Register("sqlite3", newSQLMeta) +} diff --git a/pkg/meta/sql_test.go b/pkg/meta/sql_test.go new file mode 100644 index 0000000..17a4647 --- /dev/null +++ b/pkg/meta/sql_test.go @@ -0,0 +1,47 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint:errcheck +package meta + +import ( + "path" + "testing" +) + +func TestSQLiteClient(t *testing.T) { + m, err := newSQLMeta("sqlite3", path.Join(t.TempDir(), "jfs-unit-test.db"), &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "sqlite3" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func TestMySQLClient(t *testing.T) { + m, err := newSQLMeta("mysql", "root:@/dev", &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "mysql" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func TestPostgreSQLClient(t *testing.T) { + m, err := newSQLMeta("postgres", "localhost:5432/test?sslmode=disable", &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "postgres" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} diff --git a/pkg/meta/tkv.go b/pkg/meta/tkv.go new file mode 100644 index 0000000..a3b0e80 --- /dev/null +++ b/pkg/meta/tkv.go @@ -0,0 +1,2482 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bufio" + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "math" + "math/rand" + "runtime" + "sort" + "strings" + "sync" + "syscall" + "time" + + "github.com/pkg/errors" + + "github.com/google/btree" + + "github.com/juicedata/juicefs/pkg/utils" +) + +type kvTxn interface { + get(key []byte) []byte + gets(keys ...[]byte) [][]byte + scanRange(begin, end []byte) map[string][]byte + scan(prefix []byte, handler func(key, value []byte)) + scanKeys(prefix []byte) [][]byte + scanValues(prefix []byte, filter func(k, v []byte) bool) map[string][]byte + exist(prefix []byte) bool + set(key, value []byte) + append(key []byte, value []byte) []byte + incrBy(key []byte, value int64) int64 + dels(keys ...[]byte) +} + +type tkvClient interface { + name() string + txn(f func(kvTxn) error) error + reset(prefix []byte) error + close() error + shouldRetry(err error) bool +} + +type kvMeta struct { + baseMeta + client tkvClient + snap *memKV +} + +var drivers = make(map[string]func(string) (tkvClient, error)) + +func newTkvClient(driver, addr string) (tkvClient, error) { + fn, ok := drivers[driver] + if !ok { + return nil, fmt.Errorf("unsupported driver %s", driver) + } + return fn(addr) +} + +func newKVMeta(driver, addr string, conf *Config) (Meta, error) { + client, err := newTkvClient(driver, addr) + if err != nil { + return nil, fmt.Errorf("connect to addr %s: %s", addr, err) + } + // TODO: ping server and check latency > Millisecond + // logger.Warnf("The latency to database is too high: %s", time.Since(start)) + m := &kvMeta{ + baseMeta: newBaseMeta(conf), + client: client, + } + m.en = m + m.root, err = lookupSubdir(m, conf.Subdir) + return m, err +} + +func (m *kvMeta) Shutdown() error { + return m.client.close() +} + +func (m *kvMeta) Name() string { + return m.client.name() +} + +func (m *kvMeta) doDeleteSlice(chunkid uint64, size uint32) error { + return m.deleteKeys(m.sliceKey(chunkid, size)) +} + +func (m *kvMeta) keyLen(args ...interface{}) int { + var c int + for _, a := range args { + switch a := a.(type) { + case byte: + c++ + case uint32: + c += 4 + case uint64: + c += 8 + case Ino: + c += 8 + case string: + c += len(a) + default: + logger.Fatalf("invalid type %T, value %v", a, a) + } + } + return c +} + +func (m *kvMeta) fmtKey(args ...interface{}) []byte { + b := utils.NewBuffer(uint32(m.keyLen(args...))) + for _, a := range args { + switch a := a.(type) { + case byte: + b.Put8(a) + case uint32: + b.Put32(a) + case uint64: + b.Put64(a) + case Ino: + m.encodeInode(a, b.Get(8)) + case string: + b.Put([]byte(a)) + default: + logger.Fatalf("invalid type %T, value %v", a, a) + } + } + return b.Bytes() +} + +/** + Ino iiiiiiii + Length llllllll + Indx nnnn + name ... + chunkid cccccccc + session ssssssss + +All keys: + setting format + C... counter + AiiiiiiiiI inode attribute + AiiiiiiiiD... dentry + AiiiiiiiiCnnnn file chunks + AiiiiiiiiS symlink target + AiiiiiiiiX... extented attribute + Diiiiiiiillllllll delete inodes + Fiiiiiiii Flocks + Piiiiiiii POSIX locks + Kccccccccnnnn slice refs + SHssssssss session heartbeat + SIssssssss session info + SSssssssssiiiiiiii sustained inode +*/ + +func (m *kvMeta) inodeKey(inode Ino) []byte { + return m.fmtKey("A", inode, "I") +} + +func (m *kvMeta) entryKey(parent Ino, name string) []byte { + return m.fmtKey("A", parent, "D", name) +} + +func (m *kvMeta) chunkKey(inode Ino, indx uint32) []byte { + return m.fmtKey("A", inode, "C", indx) +} + +func (m *kvMeta) sliceKey(chunkid uint64, size uint32) []byte { + return m.fmtKey("K", chunkid, size) +} + +func (m *kvMeta) symKey(inode Ino) []byte { + return m.fmtKey("A", inode, "S") +} + +func (m *kvMeta) xattrKey(inode Ino, name string) []byte { + return m.fmtKey("A", inode, "X", name) +} + +func (m *kvMeta) flockKey(inode Ino) []byte { + return m.fmtKey("F", inode) +} + +func (m *kvMeta) plockKey(inode Ino) []byte { + return m.fmtKey("P", inode) +} + +func (m *kvMeta) sessionKey(sid uint64) []byte { + return m.fmtKey("SH", sid) +} + +func (m *kvMeta) parseSid(key string) uint64 { + buf := []byte(key[2:]) // "SH" + if len(buf) != 8 { + panic("invalid sid value") + } + return binary.BigEndian.Uint64(buf) +} + +func (m *kvMeta) sessionInfoKey(sid uint64) []byte { + return m.fmtKey("SI", sid) +} + +func (m *kvMeta) sustainedKey(sid uint64, inode Ino) []byte { + return m.fmtKey("SS", sid, inode) +} + +func (m *kvMeta) encodeInode(ino Ino, buf []byte) { + binary.LittleEndian.PutUint64(buf, uint64(ino)) +} + +func (m *kvMeta) decodeInode(buf []byte) Ino { + return Ino(binary.LittleEndian.Uint64(buf)) +} + +func (m *kvMeta) delfileKey(inode Ino, length uint64) []byte { + return m.fmtKey("D", inode, length) +} + +func (m *kvMeta) counterKey(key string) []byte { + return m.fmtKey("C", key) +} + +func (m *kvMeta) packInt64(value int64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, uint64(value)) + return b +} + +func (m *kvMeta) parseInt64(buf []byte) int64 { + if len(buf) == 0 { + return 0 + } + if len(buf) != 8 { + panic("invalid value") + } + return int64(binary.BigEndian.Uint64(buf)) +} + +func packCounter(value int64) []byte { + b := make([]byte, 8) + binary.LittleEndian.PutUint64(b, uint64(value)) + return b +} + +func parseCounter(buf []byte) int64 { + if len(buf) == 0 { + return 0 + } + if len(buf) != 8 { + panic("invalid counter value") + } + return int64(binary.LittleEndian.Uint64(buf)) +} + +func (m *kvMeta) packEntry(_type uint8, inode Ino) []byte { + b := utils.NewBuffer(9) + b.Put8(_type) + b.Put64(uint64(inode)) + return b.Bytes() +} + +func (m *kvMeta) parseEntry(buf []byte) (uint8, Ino) { + b := utils.FromBuffer(buf) + return b.Get8(), Ino(b.Get64()) +} + +func (m *kvMeta) get(key []byte) ([]byte, error) { + var value []byte + err := m.client.txn(func(tx kvTxn) error { + value = tx.get(key) + return nil + }) + return value, err +} + +func (m *kvMeta) getCounter(key []byte) (int64, error) { + var value int64 + err := m.client.txn(func(tx kvTxn) error { + value = tx.incrBy(key, 0) + return nil + }) + return value, err +} + +func (m *kvMeta) scanKeys(prefix []byte) ([][]byte, error) { + var keys [][]byte + err := m.client.txn(func(tx kvTxn) error { + keys = tx.scanKeys(prefix) + return nil + }) + return keys, err +} + +func (m *kvMeta) scanValues(prefix []byte, filter func(k, v []byte) bool) (map[string][]byte, error) { + var values map[string][]byte + err := m.client.txn(func(tx kvTxn) error { + values = tx.scanValues(prefix, filter) + return nil + }) + return values, err +} + +func (m *kvMeta) Init(format Format, force bool) error { + body, err := m.get(m.fmtKey("setting")) + if err != nil { + return err + } + + if body != nil { + var old Format + err = json.Unmarshal(body, &old) + if err != nil { + return fmt.Errorf("json: %s", err) + } + if force { + old.SecretKey = "removed" + logger.Warnf("Existing volume will be overwrited: %+v", old) + } else { + format.UUID = old.UUID + // these can be safely updated. + old.Bucket = format.Bucket + old.AccessKey = format.AccessKey + old.SecretKey = format.SecretKey + old.Capacity = format.Capacity + old.Inodes = format.Inodes + old.TrashDays = format.TrashDays + if format != old { + old.SecretKey = "" + format.SecretKey = "" + return fmt.Errorf("cannot update format from %+v to %+v", old, format) + } + } + } + + data, err := json.MarshalIndent(format, "", "") + if err != nil { + logger.Fatalf("json: %s", err) + } + + m.fmt = format + ts := time.Now().Unix() + attr := &Attr{ + Typ: TypeDirectory, + Atime: ts, + Mtime: ts, + Ctime: ts, + Nlink: 2, + Length: 4 << 10, + Parent: 1, + } + return m.txn(func(tx kvTxn) error { + if format.TrashDays > 0 { + buf := tx.get(m.inodeKey(TrashInode)) + if buf == nil { + attr.Mode = 0555 + tx.set(m.inodeKey(TrashInode), m.marshal(attr)) + } + } + tx.set(m.fmtKey("setting"), data) + if body == nil || m.client.name() == "memkv" { + attr.Mode = 0777 + tx.set(m.inodeKey(1), m.marshal(attr)) + tx.incrBy(m.counterKey("nextInode"), 2) + tx.incrBy(m.counterKey("nextChunk"), 1) + } + return nil + }) +} + +func (m *kvMeta) Reset() error { + return m.client.reset(nil) +} + +func (m *kvMeta) Load() (*Format, error) { + body, err := m.get(m.fmtKey("setting")) + if err == nil && body == nil { + err = fmt.Errorf("database is not formatted") + } + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &m.fmt) + if err != nil { + return nil, fmt.Errorf("json: %s", err) + } + return &m.fmt, nil +} + +func (m *kvMeta) NewSession() error { + go m.refreshUsage() + if m.conf.ReadOnly { + return nil + } + v, err := m.incrCounter("nextSession", 1) + if err != nil { + return fmt.Errorf("create session: %s", err) + } + m.sid = uint64(v) + logger.Debugf("session is %d", m.sid) + _ = m.setValue(m.sessionKey(m.sid), m.packInt64(time.Now().Unix())) + info := newSessionInfo() + info.MountPoint = m.conf.MountPoint + data, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("json: %s", err) + } + if err = m.setValue(m.sessionInfoKey(m.sid), data); err != nil { + return fmt.Errorf("set session info: %s", err) + } + + go m.refreshSession() + go m.cleanupDeletedFiles() + go m.cleanupSlices() + go m.cleanupTrash() + go m.flushStats() + return nil +} + +func (m *kvMeta) refreshSession() { + for { + time.Sleep(time.Minute) + m.Lock() + if m.umounting { + m.Unlock() + return + } + _ = m.setValue(m.sessionKey(m.sid), m.packInt64(time.Now().Unix())) + m.Unlock() + if _, err := m.Load(); err != nil { + logger.Warnf("reload setting: %s", err) + } + go m.CleanStaleSessions() + } +} + +func (m *kvMeta) doCleanStaleSession(sid uint64) { + // release locks + flocks, err := m.scanValues(m.fmtKey("F"), nil) + if err != nil { + logger.Warnf("scan flock for stale session %d: %s", sid, err) + return + } + for k, v := range flocks { + ls := unmarshalFlock(v) + for o := range ls { + if o.sid == sid { + err = m.txn(func(tx kvTxn) error { + v := tx.get([]byte(k)) + ls := unmarshalFlock(v) + delete(ls, o) + if len(ls) > 0 { + tx.set([]byte(k), marshalFlock(ls)) + } else { + tx.dels([]byte(k)) + } + return nil + }) + if err != nil { + logger.Warnf("remove flock for stale session %d: %s", sid, err) + return + } + } + } + } + plocks, err := m.scanValues(m.fmtKey("P"), nil) + if err != nil { + logger.Warnf("scan plock for stale session %d: %s", sid, err) + return + } + for k, v := range plocks { + ls := unmarshalPlock(v) + for o := range ls { + if o.sid == sid { + err = m.txn(func(tx kvTxn) error { + v := tx.get([]byte(k)) + ls := unmarshalPlock(v) + delete(ls, o) + if len(ls) > 0 { + tx.set([]byte(k), marshalPlock(ls)) + } else { + tx.dels([]byte(k)) + } + return nil + }) + if err != nil { + logger.Warnf("remove plock for stale session %d: %s", sid, err) + return + } + } + } + } + + keys, err := m.scanKeys(m.fmtKey("SS", sid)) + if err != nil { + logger.Warnf("scan stale session %d: %s", sid, err) + return + } + for _, key := range keys { + inode := m.decodeInode(key[10:]) // "SS" + sid + if e := m.doDeleteSustainedInode(sid, inode); e != nil { + logger.Errorf("Failed to delete inode %d: %s", inode, err) + err = e + } + } + if err == nil { + err = m.deleteKeys(m.sessionKey(sid), m.sessionInfoKey(sid)) + logger.Infof("cleanup session %d: %s", sid, err) + } +} + +func (m *kvMeta) CleanStaleSessions() { + vals, err := m.scanValues(m.fmtKey("SH"), nil) + if err != nil { + logger.Warnf("scan stale sessions: %s", err) + return + } + var ids []uint64 + for k, v := range vals { + if m.parseInt64(v) < time.Now().Add(time.Minute*-5).Unix() { + ids = append(ids, m.parseSid(k)) + } + } + for _, sid := range ids { + m.doCleanStaleSession(sid) + } +} + +func (m *kvMeta) getSession(sid uint64, detail bool) (*Session, error) { + info, err := m.get(m.sessionInfoKey(sid)) + if err != nil { + return nil, err + } + if info == nil { + info = []byte("{}") + } + var s Session + if err = json.Unmarshal(info, &s); err != nil { + return nil, fmt.Errorf("corrupted session info; json error: %s", err) + } + s.Sid = sid + if detail { + inodes, err := m.scanKeys(m.fmtKey("SS", sid)) + if err != nil { + return nil, err + } + s.Sustained = make([]Ino, 0, len(inodes)) + for _, sinode := range inodes { + inode := m.decodeInode(sinode[10:]) // "SS" + sid + s.Sustained = append(s.Sustained, inode) + } + flocks, err := m.scanValues(m.fmtKey("F"), nil) + if err != nil { + return nil, err + } + for k, v := range flocks { + inode := m.decodeInode([]byte(k[1:])) // "F" + ls := unmarshalFlock(v) + for o, l := range ls { + if o.sid == sid { + s.Flocks = append(s.Flocks, Flock{inode, o.sid, string(l)}) + } + } + } + plocks, err := m.scanValues(m.fmtKey("P"), nil) + if err != nil { + return nil, err + } + for k, v := range plocks { + inode := m.decodeInode([]byte(k[1:])) // "P" + ls := unmarshalPlock(v) + for o, l := range ls { + if o.sid == sid { + s.Plocks = append(s.Plocks, Plock{inode, o.sid, l}) + } + } + } + } + return &s, nil +} + +func (m *kvMeta) GetSession(sid uint64) (*Session, error) { + value, err := m.get(m.sessionKey(sid)) + if err != nil { + return nil, err + } + if value == nil { + return nil, fmt.Errorf("session not found: %d", sid) + } + s, err := m.getSession(sid, true) + if err != nil { + return nil, err + } + s.Heartbeat = time.Unix(m.parseInt64(value), 0) + return s, nil +} + +func (m *kvMeta) ListSessions() ([]*Session, error) { + vals, err := m.scanValues(m.fmtKey("SH"), nil) + if err != nil { + return nil, err + } + sessions := make([]*Session, 0, len(vals)) + for k, v := range vals { + s, err := m.getSession(m.parseSid(k), false) + if err != nil { + logger.Errorf("get session: %s", err) + continue + } + s.Heartbeat = time.Unix(m.parseInt64(v), 0) + sessions = append(sessions, s) + } + return sessions, nil +} + +func (m *kvMeta) shouldRetry(err error) bool { + if err == nil { + return false + } + if _, ok := err.(syscall.Errno); ok { + return false + } + return m.client.shouldRetry(err) +} + +func (m *kvMeta) txn(f func(tx kvTxn) error) error { + if m.conf.ReadOnly { + return syscall.EROFS + } + start := time.Now() + defer func() { txDist.Observe(time.Since(start).Seconds()) }() + var err error + for i := 0; i < 50; i++ { + if err = m.client.txn(f); m.shouldRetry(err) { + txRestart.Add(1) + logger.Debugf("conflicted transaction, restart it (tried %d): %s", i+1, err) + time.Sleep(time.Millisecond * time.Duration(rand.Int()%((i+1)*(i+1)))) + continue + } + break + } + return err +} + +func (m *kvMeta) setValue(key, value []byte) error { + return m.txn(func(tx kvTxn) error { + tx.set(key, value) + return nil + }) +} + +func (m *kvMeta) incrCounter(name string, value int64) (int64, error) { + var new int64 + key := m.counterKey(name) + err := m.txn(func(tx kvTxn) error { + new = tx.incrBy(key, value) + return nil + }) + return new, err +} + +func (m *kvMeta) deleteKeys(keys ...[]byte) error { + if len(keys) == 0 { + return nil + } + return m.txn(func(tx kvTxn) error { + tx.dels(keys...) + return nil + }) +} + +func (m *kvMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno { + buf, err := m.get(m.entryKey(parent, name)) + if err != nil { + return errno(err) + } + if buf == nil { + return syscall.ENOENT + } + foundType, foundIno := m.parseEntry(buf) + a, err := m.get(m.inodeKey(foundIno)) + if a != nil { + m.parseAttr(a, attr) + } else if err == nil { + logger.Warnf("no attribute for inode %d (%d, %s)", foundIno, parent, name) + *attr = Attr{Typ: foundType} + } + *inode = foundIno + return errno(err) +} + +func (m *kvMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { + a, err := m.get(m.inodeKey(inode)) + if a != nil { + m.parseAttr(a, attr) + } else if err == nil { + err = syscall.ENOENT + } + return errno(err) +} + +func (m *kvMeta) SetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + return errno(m.txn(func(tx kvTxn) error { + var cur Attr + a := tx.get(m.inodeKey(inode)) + if a == nil { + return syscall.ENOENT + } + m.parseAttr(a, &cur) + if (set&(SetAttrUID|SetAttrGID)) != 0 && (set&SetAttrMode) != 0 { + attr.Mode |= (cur.Mode & 06000) + } + var changed bool + if (cur.Mode&06000) != 0 && (set&(SetAttrUID|SetAttrGID)) != 0 { + clearSUGID(ctx, &cur, attr) + changed = true + } + if set&SetAttrUID != 0 && cur.Uid != attr.Uid { + cur.Uid = attr.Uid + changed = true + } + if set&SetAttrGID != 0 && cur.Gid != attr.Gid { + cur.Gid = attr.Gid + changed = true + } + if set&SetAttrMode != 0 { + if ctx.Uid() != 0 && (attr.Mode&02000) != 0 { + if ctx.Gid() != cur.Gid { + attr.Mode &= 05777 + } + } + if attr.Mode != cur.Mode { + cur.Mode = attr.Mode + changed = true + } + } + now := time.Now() + if set&SetAttrAtime != 0 && (cur.Atime != attr.Atime || cur.Atimensec != attr.Atimensec) { + cur.Atime = attr.Atime + cur.Atimensec = attr.Atimensec + changed = true + } + if set&SetAttrAtimeNow != 0 { + cur.Atime = now.Unix() + cur.Atimensec = uint32(now.Nanosecond()) + changed = true + } + if set&SetAttrMtime != 0 && (cur.Mtime != attr.Mtime || cur.Mtimensec != attr.Mtimensec) { + cur.Mtime = attr.Mtime + cur.Mtimensec = attr.Mtimensec + changed = true + } + if set&SetAttrMtimeNow != 0 { + cur.Mtime = now.Unix() + cur.Mtimensec = uint32(now.Nanosecond()) + changed = true + } + if !changed { + *attr = cur + return nil + } + cur.Ctime = now.Unix() + cur.Ctimensec = uint32(now.Nanosecond()) + tx.set(m.inodeKey(inode), m.marshal(&cur)) + *attr = cur + return nil + })) +} + +func (m *kvMeta) Truncate(ctx Context, inode Ino, flags uint8, length uint64, attr *Attr) syscall.Errno { + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + var newSpace int64 + err := m.txn(func(tx kvTxn) error { + var t Attr + a := tx.get(m.inodeKey(inode)) + if a == nil { + return syscall.ENOENT + } + m.parseAttr(a, &t) + if t.Typ != TypeFile { + return syscall.EPERM + } + if length == t.Length { + if attr != nil { + *attr = t + } + return nil + } + newSpace = align4K(length) - align4K(t.Length) + if newSpace > 0 && m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + var left, right = t.Length, length + if left > right { + right, left = left, right + } + if right/ChunkSize-left/ChunkSize > 1 { + zeroChunks := tx.scanRange(m.chunkKey(inode, uint32(left/ChunkSize)+1), m.chunkKey(inode, uint32(right/ChunkSize))) + buf := marshalSlice(0, 0, 0, 0, ChunkSize) + for key, value := range zeroChunks { + tx.set([]byte(key), append(value, buf...)) + } + } + l := uint32(right - left) + if right > (left/ChunkSize+1)*ChunkSize { + l = ChunkSize - uint32(left%ChunkSize) + } + tx.append(m.chunkKey(inode, uint32(left/ChunkSize)), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l)) + if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 { + tx.append(m.chunkKey(inode, uint32(right/ChunkSize)), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize))) + } + t.Length = length + now := time.Now() + t.Mtime = now.Unix() + t.Mtimensec = uint32(now.Nanosecond()) + t.Ctime = now.Unix() + t.Ctimensec = uint32(now.Nanosecond()) + tx.set(m.inodeKey(inode), m.marshal(&t)) + if attr != nil { + *attr = t + } + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *kvMeta) Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64) syscall.Errno { + if mode&fallocCollapesRange != 0 && mode != fallocCollapesRange { + return syscall.EINVAL + } + if mode&fallocInsertRange != 0 && mode != fallocInsertRange { + return syscall.EINVAL + } + if mode == fallocInsertRange || mode == fallocCollapesRange { + return syscall.ENOTSUP + } + if mode&fallocPunchHole != 0 && mode&fallocKeepSize == 0 { + return syscall.EINVAL + } + if size == 0 { + return syscall.EINVAL + } + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFF) }() + var newSpace int64 + err := m.txn(func(tx kvTxn) error { + var t Attr + a := tx.get(m.inodeKey(inode)) + if a == nil { + return syscall.ENOENT + } + m.parseAttr(a, &t) + if t.Typ == TypeFIFO { + return syscall.EPIPE + } + if t.Typ != TypeFile { + return syscall.EPERM + } + length := t.Length + if off+size > t.Length { + if mode&fallocKeepSize == 0 { + length = off + size + } + } + + old := t.Length + newSpace = align4K(length) - align4K(t.Length) + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + t.Length = length + now := time.Now() + t.Mtime = now.Unix() + t.Mtimensec = uint32(now.Nanosecond()) + t.Ctime = now.Unix() + t.Ctimensec = uint32(now.Nanosecond()) + tx.set(m.inodeKey(inode), m.marshal(&t)) + if mode&(fallocZeroRange|fallocPunchHole) != 0 { + if off+size > old { + size = old - off + } + for size > 0 { + indx := uint32(off / ChunkSize) + coff := off % ChunkSize + l := size + if coff+size > ChunkSize { + l = ChunkSize - coff + } + tx.append(m.chunkKey(inode, indx), marshalSlice(uint32(coff), 0, 0, 0, uint32(l))) + off += l + size -= l + } + } + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *kvMeta) doReadlink(ctx Context, inode Ino) ([]byte, error) { + return m.get(m.symKey(inode)) +} + +func (m *kvMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno { + if m.checkQuota(4<<10, 1) { + return syscall.ENOSPC + } + parent = m.checkRoot(parent) + var ino Ino + var err error + if parent == TrashInode { + var next int64 + next, err = m.incrCounter("nextTrash", 1) + ino = TrashInode + Ino(next) + } else { + ino, err = m.nextInode() + } + if err != nil { + return errno(err) + } + if attr == nil { + attr = &Attr{} + } + attr.Typ = _type + attr.Mode = mode & ^cumask + attr.Uid = ctx.Uid() + attr.Gid = ctx.Gid() + if _type == TypeDirectory { + attr.Nlink = 2 + attr.Length = 4 << 10 + } else { + attr.Nlink = 1 + if _type == TypeSymlink { + attr.Length = uint64(len(path)) + } else { + attr.Length = 0 + attr.Rdev = rdev + } + } + attr.Parent = parent + attr.Full = true + if inode != nil { + *inode = ino + } + + err = m.txn(func(tx kvTxn) error { + var pattr Attr + a := tx.get(m.inodeKey(parent)) + if a == nil { + return syscall.ENOENT + } + m.parseAttr(a, &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + + buf := tx.get(m.entryKey(parent, name)) + var foundIno Ino + var foundType uint8 + if buf != nil { + foundType, foundIno = m.parseEntry(buf) + } else if m.conf.CaseInsensi { + if entry := m.resolveCase(ctx, parent, name); entry != nil { + foundType, foundIno = entry.Attr.Typ, entry.Inode + } + } + if foundIno != 0 { + if _type == TypeFile || _type == TypeDirectory { + a = tx.get(m.inodeKey(foundIno)) + if a != nil { + m.parseAttr(a, attr) + } else { + *attr = Attr{Typ: foundType, Parent: parent} // corrupt entry + } + if inode != nil { + *inode = foundIno + } + } + return syscall.EEXIST + } + + now := time.Now() + if _type == TypeDirectory { + pattr.Nlink++ + } + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + attr.Atime = now.Unix() + attr.Atimensec = uint32(now.Nanosecond()) + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + if pattr.Mode&02000 != 0 || ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" { + attr.Gid = pattr.Gid + if _type == TypeDirectory && runtime.GOOS == "linux" { + attr.Mode |= pattr.Mode & 02000 + } + } + + tx.set(m.entryKey(parent, name), m.packEntry(_type, ino)) + tx.set(m.inodeKey(parent), m.marshal(&pattr)) + tx.set(m.inodeKey(ino), m.marshal(attr)) + if _type == TypeSymlink { + tx.set(m.symKey(ino), []byte(path)) + } + return nil + }) + if err == nil { + m.updateStats(align4K(0), 1) + } + return errno(err) +} + +func (m *kvMeta) doUnlink(ctx Context, parent Ino, name string) syscall.Errno { + var trash Ino + if st := m.checkTrash(parent, &trash); st != 0 { + return st + } + var _type uint8 + var inode Ino + var attr Attr + var opened bool + var newSpace, newInode int64 + err := m.txn(func(tx kvTxn) error { + buf := tx.get(m.entryKey(parent, name)) + if buf == nil && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parent, name); e != nil { + name = string(e.Name) + buf = m.packEntry(e.Attr.Typ, e.Inode) + } + } + if buf == nil { + return syscall.ENOENT + } + _type, inode = m.parseEntry(buf) + if _type == TypeDirectory { + return syscall.EPERM + } + rs := tx.gets(m.inodeKey(parent), m.inodeKey(inode)) + if rs[0] == nil { + return syscall.ENOENT + } + var pattr Attr + m.parseAttr(rs[0], &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + attr = Attr{} + opened = false + now := time.Now() + if rs[1] != nil { + m.parseAttr(rs[1], &attr) + if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid { + return syscall.EACCES + } + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + if trash == 0 { + attr.Nlink-- + if _type == TypeFile && attr.Nlink == 0 { + opened = m.of.IsOpen(inode) + } + } else if attr.Nlink == 1 { + attr.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name) + trash = 0 + } + defer func() { m.of.InvalidateChunk(inode, 0xFFFFFFFE) }() + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + + tx.dels(m.entryKey(parent, name)) + tx.set(m.inodeKey(parent), m.marshal(&pattr)) + if attr.Nlink > 0 { + tx.set(m.inodeKey(inode), m.marshal(&attr)) + if trash > 0 { + tx.set(m.entryKey(trash, fmt.Sprintf("%d-%d-%s", parent, inode, name)), buf) + } + } else { + switch _type { + case TypeFile: + if opened { + tx.set(m.inodeKey(inode), m.marshal(&attr)) + tx.set(m.sustainedKey(m.sid, inode), []byte{1}) + } else { + tx.set(m.delfileKey(inode, attr.Length), m.packInt64(now.Unix())) + tx.dels(m.inodeKey(inode)) + newSpace, newInode = -align4K(attr.Length), -1 + } + case TypeSymlink: + tx.dels(m.symKey(inode)) + fallthrough + default: + tx.dels(m.inodeKey(inode)) + newSpace, newInode = -align4K(0), -1 + } + tx.dels(tx.scanKeys(m.xattrKey(inode, ""))...) + } + return nil + }) + if err == nil && trash == 0 { + if _type == TypeFile && attr.Nlink == 0 { + m.fileDeleted(opened, inode, attr.Length) + } + m.updateStats(newSpace, newInode) + } + return errno(err) +} + +func (m *kvMeta) doRmdir(ctx Context, parent Ino, name string) syscall.Errno { + var trash Ino + if st := m.checkTrash(parent, &trash); st != 0 { + return st + } + err := m.txn(func(tx kvTxn) error { + buf := tx.get(m.entryKey(parent, name)) + if buf == nil && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parent, name); e != nil { + name = string(e.Name) + buf = m.packEntry(e.Attr.Typ, e.Inode) + } + } + if buf == nil { + return syscall.ENOENT + } + _type, inode := m.parseEntry(buf) + if _type != TypeDirectory { + return syscall.ENOTDIR + } + rs := tx.gets(m.inodeKey(parent), m.inodeKey(inode)) + if rs[0] == nil { + return syscall.ENOENT + } + var pattr, attr Attr + m.parseAttr(rs[0], &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + if tx.exist(m.entryKey(inode, "")) { + return syscall.ENOTEMPTY + } + + now := time.Now() + if rs[1] != nil { + m.parseAttr(rs[1], &attr) + if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid { + return syscall.EACCES + } + if trash > 0 { + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + attr.Parent = trash + } + } else { + logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name) + trash = 0 + } + pattr.Nlink-- + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + + tx.set(m.inodeKey(parent), m.marshal(&pattr)) + tx.dels(m.entryKey(parent, name)) + if trash > 0 { + tx.set(m.inodeKey(inode), m.marshal(&attr)) + tx.set(m.entryKey(trash, fmt.Sprintf("%d-%d-%s", parent, inode, name)), buf) + } else { + tx.dels(m.inodeKey(inode)) + tx.dels(tx.scanKeys(m.xattrKey(inode, ""))...) + } + return nil + }) + if err == nil && trash == 0 { + m.updateStats(-align4K(0), -1) + } + return errno(err) +} + +func (m *kvMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno { + var trash Ino + if st := m.checkTrash(parentDst, &trash); st != 0 { + return st + } + exchange := flags == RenameExchange + var opened bool + var dino Ino + var dtyp uint8 + var tattr Attr + var newSpace, newInode int64 + err := m.txn(func(tx kvTxn) error { + buf := tx.get(m.entryKey(parentSrc, nameSrc)) + if buf == nil && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parentSrc, nameSrc); e != nil { + nameSrc = string(e.Name) + buf = m.packEntry(e.Attr.Typ, e.Inode) + } + } + if buf == nil { + return syscall.ENOENT + } + typ, ino := m.parseEntry(buf) + if parentSrc == parentDst && nameSrc == nameDst { + if inode != nil { + *inode = ino + } + return nil + } + rs := tx.gets(m.inodeKey(parentSrc), m.inodeKey(parentDst), m.inodeKey(ino)) + if rs[0] == nil || rs[1] == nil || rs[2] == nil { + return syscall.ENOENT + } + var sattr, dattr, iattr Attr + m.parseAttr(rs[0], &sattr) + if sattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + m.parseAttr(rs[1], &dattr) + if dattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + m.parseAttr(rs[2], &iattr) + + dbuf := tx.get(m.entryKey(parentDst, nameDst)) + if dbuf == nil && m.conf.CaseInsensi { + if e := m.resolveCase(ctx, parentDst, nameDst); e != nil { + nameDst = string(e.Name) + dbuf = m.packEntry(e.Attr.Typ, e.Inode) + } + } + now := time.Now() + tattr = Attr{} + opened = false + if dbuf != nil { + if flags == RenameNoReplace { + return syscall.EEXIST + } + dtyp, dino = m.parseEntry(dbuf) + a := tx.get(m.inodeKey(dino)) + if a == nil { // corrupt entry + logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, nameDst) + trash = 0 + } + m.parseAttr(a, &tattr) + tattr.Ctime = now.Unix() + tattr.Ctimensec = uint32(now.Nanosecond()) + if exchange { + tattr.Parent = parentSrc + if dtyp == TypeDirectory && parentSrc != parentDst { + dattr.Nlink-- + sattr.Nlink++ + } + } else { + if dtyp == TypeDirectory { + if tx.exist(m.entryKey(dino, "")) { + return syscall.ENOTEMPTY + } + dattr.Nlink-- + if trash > 0 { + tattr.Parent = trash + } + } else { + if trash == 0 { + tattr.Nlink-- + if dtyp == TypeFile && tattr.Nlink == 0 { + opened = m.of.IsOpen(dino) + } + defer func() { m.of.InvalidateChunk(dino, 0xFFFFFFFE) }() + } else if tattr.Nlink == 1 { + tattr.Parent = trash + } + } + } + if ctx.Uid() != 0 && dattr.Mode&01000 != 0 && ctx.Uid() != dattr.Uid && ctx.Uid() != tattr.Uid { + return syscall.EACCES + } + } else { + if exchange { + return syscall.ENOENT + } + dino, dtyp = 0, 0 + } + if ctx.Uid() != 0 && sattr.Mode&01000 != 0 && ctx.Uid() != sattr.Uid && ctx.Uid() != iattr.Uid { + return syscall.EACCES + } + + sattr.Mtime = now.Unix() + sattr.Mtimensec = uint32(now.Nanosecond()) + sattr.Ctime = now.Unix() + sattr.Ctimensec = uint32(now.Nanosecond()) + dattr.Mtime = now.Unix() + dattr.Mtimensec = uint32(now.Nanosecond()) + dattr.Ctime = now.Unix() + dattr.Ctimensec = uint32(now.Nanosecond()) + iattr.Parent = parentDst + iattr.Ctime = now.Unix() + iattr.Ctimensec = uint32(now.Nanosecond()) + if typ == TypeDirectory && parentSrc != parentDst { + sattr.Nlink-- + dattr.Nlink++ + } + if inode != nil { + *inode = ino + } + if attr != nil { + *attr = iattr + } + + if exchange { // dino > 0 + tx.set(m.entryKey(parentSrc, nameSrc), dbuf) + tx.set(m.inodeKey(dino), m.marshal(&tattr)) + } else { + tx.dels(m.entryKey(parentSrc, nameSrc)) + if dino > 0 { + if trash > 0 { + tx.set(m.inodeKey(dino), m.marshal(&tattr)) + tx.set(m.entryKey(trash, fmt.Sprintf("%d-%d-%s", parentDst, dino, nameDst)), dbuf) + } else if dtyp != TypeDirectory && tattr.Nlink > 0 { + tx.set(m.inodeKey(dino), m.marshal(&tattr)) + } else { + if dtyp == TypeFile { + if opened { + tx.set(m.inodeKey(dino), m.marshal(&tattr)) + tx.set(m.sustainedKey(m.sid, dino), []byte{1}) + } else { + tx.set(m.delfileKey(dino, tattr.Length), m.packInt64(now.Unix())) + tx.dels(m.inodeKey(dino)) + newSpace, newInode = -align4K(tattr.Length), -1 + } + } else { + if dtyp == TypeSymlink { + tx.dels(m.symKey(dino)) + } + tx.dels(m.inodeKey(dino)) + newSpace, newInode = -align4K(0), -1 + } + tx.dels(tx.scanKeys(m.xattrKey(dino, ""))...) + } + } + } + if parentDst != parentSrc && !isTrash(parentSrc) { + tx.set(m.inodeKey(parentSrc), m.marshal(&sattr)) + } + tx.set(m.inodeKey(ino), m.marshal(&iattr)) + tx.set(m.entryKey(parentDst, nameDst), buf) + tx.set(m.inodeKey(parentDst), m.marshal(&dattr)) + return nil + }) + if err == nil && !exchange && trash == 0 { + if dino > 0 && dtyp == TypeFile && tattr.Nlink == 0 { + m.fileDeleted(opened, dino, tattr.Length) + } + m.updateStats(newSpace, newInode) + } + return errno(err) +} + +func (m *kvMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno { + return errno(m.txn(func(tx kvTxn) error { + rs := tx.gets(m.inodeKey(parent), m.inodeKey(inode)) + if rs[0] == nil || rs[1] == nil { + return syscall.ENOENT + } + var pattr, iattr Attr + m.parseAttr(rs[0], &pattr) + if pattr.Typ != TypeDirectory { + return syscall.ENOTDIR + } + m.parseAttr(rs[1], &iattr) + if iattr.Typ == TypeDirectory { + return syscall.EPERM + } + buf := tx.get(m.entryKey(parent, name)) + if buf != nil || buf == nil && m.conf.CaseInsensi && m.resolveCase(ctx, parent, name) != nil { + return syscall.EEXIST + } + + now := time.Now() + pattr.Mtime = now.Unix() + pattr.Mtimensec = uint32(now.Nanosecond()) + pattr.Ctime = now.Unix() + pattr.Ctimensec = uint32(now.Nanosecond()) + iattr.Ctime = now.Unix() + iattr.Ctimensec = uint32(now.Nanosecond()) + iattr.Nlink++ + tx.set(m.entryKey(parent, name), m.packEntry(iattr.Typ, inode)) + tx.set(m.inodeKey(parent), m.marshal(&pattr)) + tx.set(m.inodeKey(inode), m.marshal(&iattr)) + if attr != nil { + *attr = iattr + } + return nil + })) +} + +func (m *kvMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) syscall.Errno { + // TODO: handle big directory + vals, err := m.scanValues(m.entryKey(inode, ""), nil) + if err != nil { + return errno(err) + } + prefix := len(m.entryKey(inode, "")) + for name, buf := range vals { + typ, inode := m.parseEntry(buf) + *entries = append(*entries, &Entry{ + Inode: inode, + Name: []byte(name)[prefix:], + Attr: &Attr{Typ: typ}, + }) + } + + if plus != 0 { + fillAttr := func(es []*Entry) error { + var keys = make([][]byte, len(es)) + for i, e := range es { + keys[i] = m.inodeKey(e.Inode) + } + var rs [][]byte + err := m.client.txn(func(tx kvTxn) error { + rs = tx.gets(keys...) + return nil + }) + if err != nil { + return err + } + for j, re := range rs { + if re != nil { + m.parseAttr(re, es[j].Attr) + } + } + return nil + } + batchSize := 4096 + nEntries := len(*entries) + if nEntries <= batchSize { + err = fillAttr(*entries) + } else { + indexCh := make(chan []*Entry, 10) + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for es := range indexCh { + if e := fillAttr(es); e != nil { + err = e + break + } + } + }() + } + for i := 0; i < nEntries; i += batchSize { + if i+batchSize > nEntries { + indexCh <- (*entries)[i:] + } else { + indexCh <- (*entries)[i : i+batchSize] + } + } + close(indexCh) + wg.Wait() + } + if err != nil { + return errno(err) + } + } + return 0 +} + +func (m *kvMeta) doDeleteSustainedInode(sid uint64, inode Ino) error { + var attr Attr + var newSpace int64 + err := m.txn(func(tx kvTxn) error { + a := tx.get(m.inodeKey(inode)) + if a == nil { + return nil + } + m.parseAttr(a, &attr) + tx.set(m.delfileKey(inode, attr.Length), m.packInt64(time.Now().Unix())) + tx.dels(m.inodeKey(inode)) + tx.dels(m.sustainedKey(sid, inode)) + newSpace = -align4K(attr.Length) + return nil + }) + if err == nil { + m.updateStats(newSpace, -1) + go m.doDeleteFileData(inode, attr.Length) + } + return err +} + +func (m *kvMeta) Read(ctx Context, inode Ino, indx uint32, chunks *[]Slice) syscall.Errno { + f := m.of.find(inode) + if f != nil { + f.RLock() + defer f.RUnlock() + } + if cs, ok := m.of.ReadChunk(inode, indx); ok { + *chunks = cs + return 0 + } + defer timeit(time.Now()) + val, err := m.get(m.chunkKey(inode, indx)) + if err != nil { + return errno(err) + } + ss := readSliceBuf(val) + if ss == nil { + return syscall.EIO + } + *chunks = buildSlice(ss) + m.of.CacheChunk(inode, indx, *chunks) + if !m.conf.ReadOnly && (len(val)/sliceBytes >= 5 || len(*chunks) >= 5) { + go m.compactChunk(inode, indx, false) + } + return 0 +} + +func (m *kvMeta) Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice) syscall.Errno { + defer timeit(time.Now()) + f := m.of.find(inode) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(inode, indx) }() + var newSpace int64 + var needCompact bool + err := m.txn(func(tx kvTxn) error { + var attr Attr + a := tx.get(m.inodeKey(inode)) + if a == nil { + return syscall.ENOENT + } + m.parseAttr(a, &attr) + if attr.Typ != TypeFile { + return syscall.EPERM + } + newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len) + if newleng > attr.Length { + newSpace = align4K(newleng) - align4K(attr.Length) + attr.Length = newleng + } + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + now := time.Now() + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + val := tx.append(m.chunkKey(inode, indx), marshalSlice(off, slice.Chunkid, slice.Size, slice.Off, slice.Len)) + tx.set(m.inodeKey(inode), m.marshal(&attr)) + needCompact = (len(val)/sliceBytes)%100 == 99 + return nil + }) + if err == nil { + if needCompact { + go m.compactChunk(inode, indx, false) + } + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *kvMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied *uint64) syscall.Errno { + defer timeit(time.Now()) + var newSpace int64 + f := m.of.find(fout) + if f != nil { + f.Lock() + defer f.Unlock() + } + defer func() { m.of.InvalidateChunk(fout, 0xFFFFFFFF) }() + err := m.txn(func(tx kvTxn) error { + rs := tx.gets(m.inodeKey(fin), m.inodeKey(fout)) + if rs[0] == nil || rs[1] == nil { + return syscall.ENOENT + } + var sattr Attr + m.parseAttr(rs[0], &sattr) + if sattr.Typ != TypeFile { + return syscall.EINVAL + } + if offIn >= sattr.Length { + *copied = 0 + return nil + } + if offIn+size > sattr.Length { + size = sattr.Length - offIn + } + var attr Attr + m.parseAttr(rs[1], &attr) + if attr.Typ != TypeFile { + return syscall.EINVAL + } + + newleng := offOut + size + if newleng > attr.Length { + newSpace = align4K(newleng) - align4K(attr.Length) + attr.Length = newleng + } + if m.checkQuota(newSpace, 0) { + return syscall.ENOSPC + } + now := time.Now() + attr.Mtime = now.Unix() + attr.Mtimensec = uint32(now.Nanosecond()) + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + + vals := tx.scanRange(m.chunkKey(fin, uint32(offIn/ChunkSize)), m.chunkKey(fin, uint32(offIn+size/ChunkSize)+1)) + chunks := make(map[uint32][]*slice) + for indx := uint32(offIn / ChunkSize); indx <= uint32((offIn+size)/ChunkSize); indx++ { + if v, ok := vals[string(m.chunkKey(fin, indx))]; ok { + chunks[indx] = readSliceBuf(v) + } + } + + coff := offIn / ChunkSize * ChunkSize + for coff < offIn+size { + if coff%ChunkSize != 0 { + panic("coff") + } + // Add a zero chunk for hole + ss := append([]*slice{{len: ChunkSize}}, chunks[uint32(coff/ChunkSize)]...) + cs := buildSlice(ss) + for _, s := range cs { + pos := coff + coff += uint64(s.Len) + if pos < offIn+size && pos+uint64(s.Len) > offIn { + if pos < offIn { + dec := offIn - pos + s.Off += uint32(dec) + pos += dec + s.Len -= uint32(dec) + } + if pos+uint64(s.Len) > offIn+size { + dec := pos + uint64(s.Len) - (offIn + size) + s.Len -= uint32(dec) + } + doff := pos - offIn + offOut + indx := uint32(doff / ChunkSize) + dpos := uint32(doff % ChunkSize) + if dpos+s.Len > ChunkSize { + tx.append(m.chunkKey(fout, indx), marshalSlice(dpos, s.Chunkid, s.Size, s.Off, ChunkSize-dpos)) + if s.Chunkid > 0 { + tx.incrBy(m.sliceKey(s.Chunkid, s.Size), 1) + } + skip := ChunkSize - dpos + tx.append(m.chunkKey(fout, indx+1), marshalSlice(0, s.Chunkid, s.Size, s.Off+skip, s.Len-skip)) + if s.Chunkid > 0 { + tx.incrBy(m.sliceKey(s.Chunkid, s.Size), 1) + } + } else { + tx.append(m.chunkKey(fout, indx), marshalSlice(dpos, s.Chunkid, s.Size, s.Off, s.Len)) + if s.Chunkid > 0 { + tx.incrBy(m.sliceKey(s.Chunkid, s.Size), 1) + } + } + } + } + } + tx.set(m.inodeKey(fout), m.marshal(&attr)) + *copied = size + return nil + }) + if err == nil { + m.updateStats(newSpace, 0) + } + return errno(err) +} + +func (m *kvMeta) cleanupDeletedFiles() { + for { + time.Sleep(time.Minute) + klen := 1 + 8 + 8 + now := time.Now().Unix() + vals, _ := m.scanValues(m.fmtKey("D"), func(k, v []byte) bool { + // filter out invalid ones + return len(k) == klen && len(v) == 8 && m.parseInt64(v)+60 < now + }) + for k := range vals { + rb := utils.FromBuffer([]byte(k)[1:]) + inode := m.decodeInode(rb.Get(8)) + length := rb.Get64() + logger.Debugf("cleanup chunks of inode %d with %d bytes", inode, length) + m.doDeleteFileData(inode, length) + } + } +} + +func (m *kvMeta) cleanupSlices() { + for { + time.Sleep(time.Hour) + + // once per hour + now := time.Now().Unix() + last, err := m.get(m.counterKey("nextCleanupSlices")) + if err != nil || m.parseInt64(last)+3600 > now { + continue + } + _ = m.setValue(m.counterKey("nextCleanupSlices"), m.packInt64(now)) + m.doCleanupSlices() + } +} + +func (m *kvMeta) doCleanupSlices() { + klen := 1 + 8 + 4 + vals, _ := m.scanValues(m.fmtKey("K"), func(k, v []byte) bool { + // filter out invalid ones + return len(k) == klen && len(v) == 8 && parseCounter(v) <= 0 + }) + for k, v := range vals { + rb := utils.FromBuffer([]byte(k)[1:]) + chunkid := rb.Get64() + size := rb.Get32() + refs := parseCounter(v) + if refs < 0 { + m.deleteSlice(chunkid, size) + } else { + m.cleanupZeroRef(chunkid, size) + } + } +} + +func (m *kvMeta) deleteChunk(inode Ino, indx uint32) error { + key := m.chunkKey(inode, indx) + var todel []*slice + err := m.txn(func(tx kvTxn) error { + buf := tx.get(key) + slices := readSliceBuf(buf) + tx.dels(key) + for _, s := range slices { + r := tx.incrBy(m.sliceKey(s.chunkid, s.size), -1) + if r < 0 { + todel = append(todel, s) + } + } + return nil + }) + if err != nil { + return err + } + for _, s := range todel { + m.deleteSlice(s.chunkid, s.size) + } + return nil +} + +func (r *kvMeta) cleanupZeroRef(chunkid uint64, size uint32) { + _ = r.txn(func(tx kvTxn) error { + v := tx.incrBy(r.sliceKey(chunkid, size), 0) + if v != 0 { + return syscall.EINVAL + } + tx.dels(r.sliceKey(chunkid, size)) + return nil + }) +} + +func (m *kvMeta) doDeleteFileData(inode Ino, length uint64) { + keys, err := m.scanKeys(m.fmtKey("A", inode, "C")) + if err != nil { + logger.Warnf("delete chunks of inode %d: %s", inode, err) + return + } + for i := range keys { + idx := binary.BigEndian.Uint32(keys[i][10:]) + err := m.deleteChunk(inode, idx) + if err != nil { + logger.Warnf("delete chunk %d:%d: %s", inode, idx, err) + return + } + } + _ = m.deleteKeys(m.delfileKey(inode, length)) +} + +func (m *kvMeta) compactChunk(inode Ino, indx uint32, force bool) { + if !force { + // avoid too many or duplicated compaction + m.Lock() + k := uint64(inode) + (uint64(indx) << 32) + if len(m.compacting) > 10 || m.compacting[k] { + m.Unlock() + return + } + m.compacting[k] = true + m.Unlock() + defer func() { + m.Lock() + delete(m.compacting, k) + m.Unlock() + }() + } + + buf, err := m.get(m.chunkKey(inode, indx)) + if err != nil { + return + } + + ss := readSliceBuf(buf) + skipped := skipSome(ss) + ss = ss[skipped:] + pos, size, chunks := compactChunk(ss) + if len(ss) < 2 || size == 0 { + return + } + + var chunkid uint64 + st := m.NewChunk(Background, &chunkid) + if st != 0 { + return + } + logger.Debugf("compact %d:%d: skipped %d slices (%d bytes) %d slices (%d bytes)", inode, indx, skipped, pos, len(ss), size) + err = m.newMsg(CompactChunk, chunks, chunkid) + if err != nil { + if !strings.Contains(err.Error(), "not exist") && !strings.Contains(err.Error(), "not found") { + logger.Warnf("compact %d %d with %d slices: %s", inode, indx, len(ss), err) + } + return + } + err = m.txn(func(tx kvTxn) error { + buf2 := tx.get(m.chunkKey(inode, indx)) + if len(buf2) < len(buf) || !bytes.Equal(buf, buf2[:len(buf)]) { + logger.Infof("chunk %d:%d was changed %d -> %d", inode, indx, len(buf), len(buf2)) + return syscall.EINVAL + } + + buf2 = append(append(buf2[:skipped*sliceBytes], marshalSlice(pos, chunkid, size, 0, size)...), buf2[len(buf):]...) + tx.set(m.chunkKey(inode, indx), buf2) + // create the key to tracking it + tx.set(m.sliceKey(chunkid, size), make([]byte, 8)) + for _, s := range ss { + tx.incrBy(m.sliceKey(s.chunkid, s.size), -1) + } + return nil + }) + // there could be false-negative that the compaction is successful, double-check + if err != nil { + logger.Warnf("compact %d:%d failed: %s", inode, indx, err) + refs, e := m.get(m.sliceKey(chunkid, size)) + if e == nil { + if len(refs) > 0 { + err = nil + } else { + logger.Infof("compacted chunk %d was not used", chunkid) + err = syscall.EINVAL + } + } + } + + if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINVAL { + logger.Infof("compaction for %d:%d is wasted, delete slice %d (%d bytes)", inode, indx, chunkid, size) + m.deleteSlice(chunkid, size) + } else if err == nil { + m.of.InvalidateChunk(inode, indx) + m.cleanupZeroRef(chunkid, size) + for _, s := range ss { + refs, err := m.getCounter(m.sliceKey(s.chunkid, s.size)) + if err == nil && refs < 0 { + m.deleteSlice(s.chunkid, s.size) + } + } + } else { + logger.Warnf("compact %d %d: %s", inode, indx, err) + } + go func() { + // wait for the current compaction to finish + time.Sleep(time.Millisecond * 10) + m.compactChunk(inode, indx, force) + }() +} + +func (r *kvMeta) CompactAll(ctx Context, bar *utils.Bar) syscall.Errno { + // AiiiiiiiiCnnnn file chunks + klen := 1 + 8 + 1 + 4 + result, err := r.scanValues(r.fmtKey("A"), func(k, v []byte) bool { + return len(k) == klen && k[1+8] == 'C' && len(v) > sliceBytes + }) + if err != nil { + logger.Warnf("scan chunks: %s", err) + return errno(err) + } + + bar.IncrTotal(int64(len(result))) + for k, value := range result { + key := []byte(k[1:]) + inode := r.decodeInode(key[:8]) + indx := binary.BigEndian.Uint32(key[9:]) + logger.Debugf("compact chunk %d:%d (%d slices)", inode, indx, len(value)/sliceBytes) + r.compactChunk(inode, indx, true) + bar.Increment() + } + return 0 +} + +func (m *kvMeta) ListSlices(ctx Context, slices map[Ino][]Slice, delete bool, showProgress func()) syscall.Errno { + if delete { + m.doCleanupSlices() + } + // AiiiiiiiiCnnnn file chunks + klen := 1 + 8 + 1 + 4 + result, err := m.scanValues(m.fmtKey("A"), func(k, v []byte) bool { + return len(k) == klen && k[1+8] == 'C' + }) + if err != nil { + logger.Warnf("scan chunks: %s", err) + return errno(err) + } + for key, value := range result { + inode := m.decodeInode([]byte(key)[1:9]) + ss := readSliceBuf(value) + for _, s := range ss { + if s.chunkid > 0 { + slices[inode] = append(slices[inode], Slice{Chunkid: s.chunkid, Size: s.size}) + if showProgress != nil { + showProgress() + } + } + } + } + return 0 +} + +func (m *kvMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + buf, err := m.get(m.xattrKey(inode, name)) + if err != nil { + return errno(err) + } + if buf == nil { + return ENOATTR + } + *vbuff = buf + return 0 +} + +func (m *kvMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno { + defer timeit(time.Now()) + inode = m.checkRoot(inode) + keys, err := m.scanKeys(m.xattrKey(inode, "")) + if err != nil { + return errno(err) + } + *names = nil + prefix := len(m.xattrKey(inode, "")) + for _, name := range keys { + *names = append(*names, name[prefix:]...) + *names = append(*names, 0) + } + return 0 +} + +func (m *kvMeta) SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = m.checkRoot(inode) + key := m.xattrKey(inode, name) + err := m.txn(func(tx kvTxn) error { + switch flags { + case XattrCreate: + v := tx.get(key) + if v != nil { + return syscall.EEXIST + } + case XattrReplace: + v := tx.get(key) + if v == nil { + return ENOATTR + } + } + tx.set(key, value) + return nil + }) + return errno(err) +} + +func (m *kvMeta) RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno { + if name == "" { + return syscall.EINVAL + } + defer timeit(time.Now()) + inode = m.checkRoot(inode) + value, err := m.get(m.xattrKey(inode, name)) + if err != nil { + return errno(err) + } + if value == nil { + return ENOATTR + } + return errno(m.deleteKeys(m.xattrKey(inode, name))) +} + +func (m *kvMeta) dumpEntry(inode Ino) (*DumpedEntry, error) { + e := &DumpedEntry{} + f := func(tx kvTxn) error { + a := tx.get(m.inodeKey(inode)) + if a == nil { + return fmt.Errorf("inode %d not found", inode) + } + attr := &Attr{} + m.parseAttr(a, attr) + e.Attr = dumpAttr(attr) + e.Attr.Inode = inode + + vals := tx.scanValues(m.xattrKey(inode, ""), nil) + if len(vals) > 0 { + xattrs := make([]*DumpedXattr, 0, len(vals)) + for k, v := range vals { + xattrs = append(xattrs, &DumpedXattr{k[10:], string(v)}) // "A" + inode + "X" + } + sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name }) + e.Xattrs = xattrs + } + + if attr.Typ == TypeFile { + vals = tx.scanRange(m.chunkKey(inode, 0), m.chunkKey(inode, uint32(attr.Length/ChunkSize)+1)) + for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ { + v, ok := vals[string(m.chunkKey(inode, indx))] + if !ok { + continue + } + ss := readSliceBuf(v) + slices := make([]*DumpedSlice, 0, len(ss)) + for _, s := range ss { + slices = append(slices, &DumpedSlice{Chunkid: s.chunkid, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len}) + } + e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices}) + } + } else if attr.Typ == TypeSymlink { + l := tx.get(m.symKey(inode)) + if l == nil { + return fmt.Errorf("no link target for inode %d", inode) + } + e.Symlink = string(l) + } + + return nil + } + if m.snap != nil { + return e, m.snap.txn(f) + } else { + return e, m.txn(f) + } +} + +func (m *kvMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth int, showProgress func(totalIncr, currentIncr int64)) error { + bwWrite := func(s string) { + if _, err := bw.WriteString(s); err != nil { + panic(err) + } + } + var vals map[string][]byte + var err error + if m.snap != nil { + err = m.snap.txn(func(tx kvTxn) error { + vals = tx.scanValues(m.entryKey(inode, ""), nil) + return nil + }) + } else { + vals, err = m.scanValues(m.entryKey(inode, ""), nil) + } + if err != nil { + return err + } + if showProgress != nil { + showProgress(int64(len(vals)), 0) + } + if err = tree.writeJsonWithOutEntry(bw, depth); err != nil { + return err + } + var sortedName []string + for k := range vals { + sortedName = append(sortedName, k) + } + sort.Slice(sortedName, func(i, j int) bool { return sortedName[i][10:] < sortedName[j][10:] }) + + for idx, name := range sortedName { + typ, inode := m.parseEntry(vals[name]) + var entry *DumpedEntry + entry, err = m.dumpEntry(inode) + if err != nil { + return err + } + entry.Name = name[10:] + if typ == TypeDirectory { + err = m.dumpDir(inode, entry, bw, depth+2, showProgress) + } else { + err = entry.writeJSON(bw, depth+2) + } + if err != nil { + return err + } + if idx != len(vals)-1 { + bwWrite(",") + } + if showProgress != nil { + showProgress(0, 1) + } + } + bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth))) + return nil +} + +func (m *kvMeta) DumpMeta(w io.Writer, root Ino) (err error) { + defer func() { + if p := recover(); p != nil { + if e, ok := p.(error); ok { + err = e + } else { + err = errors.Errorf("DumpMeta error: %v", p) + } + } + }() + vals, err := m.scanValues(m.fmtKey("D"), nil) + if err != nil { + return err + } + dels := make([]*DumpedDelFile, 0, len(vals)) + for k, v := range vals { + b := utils.FromBuffer([]byte(k[1:])) // "D" + if b.Len() != 16 { + return fmt.Errorf("invalid delfileKey: %s", k) + } + inode := m.decodeInode(b.Get(8)) + dels = append(dels, &DumpedDelFile{inode, b.Get64(), m.parseInt64(v)}) + } + + progress := utils.NewProgress(false, false) + var tree, trash *DumpedEntry + if root == 0 { + root = m.root + } + if root == 1 { // make snap + switch c := m.client.(type) { + case *memKV: + m.snap = c + default: + m.snap = &memKV{items: btree.New(2), temp: &kvItem{}} + bar := progress.AddCountBar("Snapshot keys", 0) + if err = m.txn(func(tx kvTxn) error { + used := parseCounter(tx.get(m.counterKey(usedSpace))) + inodeTotal := parseCounter(tx.get(m.counterKey(totalInodes))) + guessKeyTotal := int64(math.Ceil((float64(used/inodeTotal/(64*1024*1024)) + float64(3)) * float64(inodeTotal))) + bar.SetCurrent(0) // Reset + bar.SetTotal(guessKeyTotal) + threshold := 0.1 + tx.scan(nil, func(key, value []byte) { + m.snap.set(string(key), value) + if bar.Current() > int64(math.Ceil(float64(guessKeyTotal)*(1-threshold))) { + guessKeyTotal += int64(math.Ceil(float64(guessKeyTotal) * threshold)) + bar.SetTotal(guessKeyTotal) + } + bar.Increment() + }) + return nil + }); err != nil { + return err + } + bar.Done() + } + if trash, err = m.dumpEntry(TrashInode); err != nil { + trash = nil + } + } + if tree, err = m.dumpEntry(root); err != nil { + return err + } + if tree == nil { + return errors.New("The entry of the root inode was not found") + } + tree.Name = "FSTree" + format, err := m.Load() + if err != nil { + return err + } + + var rs [][]byte + err = m.txn(func(tx kvTxn) error { + rs = tx.gets(m.counterKey(usedSpace), + m.counterKey(totalInodes), + m.counterKey("nextInode"), + m.counterKey("nextChunk"), + m.counterKey("nextSession"), + m.counterKey("nextTrash")) + return nil + }) + if err != nil { + return err + } + cs := make([]int64, len(rs)) + for i, r := range rs { + if r != nil { + cs[i] = parseCounter(r) + } + } + + if root == 1 { + err = m.snap.txn(func(tx kvTxn) error { + vals = tx.scanValues(m.fmtKey("SS"), nil) + return nil + }) + } else { + vals, err = m.scanValues(m.fmtKey("SS"), nil) + } + if err != nil { + return err + } + ss := make(map[uint64][]Ino) + for k := range vals { + b := utils.FromBuffer([]byte(k[2:])) // "SS" + if b.Len() != 16 { + return fmt.Errorf("invalid sustainedKey: %s", k) + } + sid := b.Get64() + inode := m.decodeInode(b.Get(8)) + ss[sid] = append(ss[sid], inode) + } + sessions := make([]*DumpedSustained, 0, len(ss)) + for k, v := range ss { + sessions = append(sessions, &DumpedSustained{k, v}) + } + + dm := DumpedMeta{ + Setting: format, + Counters: &DumpedCounters{ + UsedSpace: cs[0], + UsedInodes: cs[1], + NextInode: cs[2], + NextChunk: cs[3], + NextSession: cs[4], + NextTrash: cs[5], + }, + Sustained: sessions, + DelFiles: dels, + } + bw, err := dm.writeJsonWithOutTree(w) + if err != nil { + return err + } + + bar := progress.AddCountBar("Dumped entries", 1) // with root + bar.Increment() + if trash != nil { + trash.Name = "Trash" + bar.IncrTotal(1) + bar.Increment() + } + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + if err = m.dumpDir(root, tree, bw, 1, showProgress); err != nil { + return err + } + if trash != nil { + if _, err = bw.WriteString(","); err != nil { + return err + } + if err = m.dumpDir(TrashInode, trash, bw, 1, showProgress); err != nil { + return err + } + } + if _, err = bw.WriteString("\n}\n"); err != nil { + return err + } + progress.Done() + m.snap = nil + + return bw.Flush() +} + +func (m *kvMeta) loadEntry(e *DumpedEntry, cs *DumpedCounters, refs map[string]int64) error { + inode := e.Attr.Inode + logger.Debugf("Loading entry inode %d name %s", inode, e.Name) + attr := loadAttr(e.Attr) + attr.Parent = e.Parent + return m.txn(func(tx kvTxn) error { + if attr.Typ == TypeFile { + attr.Length = e.Attr.Length + for _, c := range e.Chunks { + if len(c.Slices) == 0 { + continue + } + slices := make([]byte, 0, sliceBytes*len(c.Slices)) + for _, s := range c.Slices { + slices = append(slices, marshalSlice(s.Pos, s.Chunkid, s.Size, s.Off, s.Len)...) + m.Lock() + refs[string(m.sliceKey(s.Chunkid, s.Size))]++ + m.Unlock() + if cs.NextChunk <= int64(s.Chunkid) { + cs.NextChunk = int64(s.Chunkid) + 1 + } + } + tx.set(m.chunkKey(inode, c.Index), slices) + } + } else if attr.Typ == TypeDirectory { + attr.Length = 4 << 10 + for _, c := range e.Entries { + tx.set(m.entryKey(inode, c.Name), m.packEntry(typeFromString(c.Attr.Type), c.Attr.Inode)) + } + } else if attr.Typ == TypeSymlink { + attr.Length = uint64(len(e.Symlink)) + tx.set(m.symKey(inode), []byte(e.Symlink)) + } + if inode > 1 && inode != TrashInode { + cs.UsedSpace += align4K(attr.Length) + cs.UsedInodes += 1 + } + if inode < TrashInode { + if cs.NextInode <= int64(inode) { + cs.NextInode = int64(inode) + 1 + } + } else { + if cs.NextTrash < int64(inode)-TrashInode { + cs.NextTrash = int64(inode) - TrashInode + } + } + + for _, x := range e.Xattrs { + tx.set(m.xattrKey(inode, x.Name), []byte(x.Value)) + } + tx.set(m.inodeKey(inode), m.marshal(attr)) + return nil + }) +} + +func (m *kvMeta) LoadMeta(r io.Reader) error { + var exist bool + err := m.txn(func(tx kvTxn) error { + exist = tx.exist(m.fmtKey()) + return nil + }) + if err != nil { + return err + } + if exist { + return fmt.Errorf("Database %s is not empty", m.Name()) + } + + dec := json.NewDecoder(r) + dm := &DumpedMeta{} + if err := dec.Decode(dm); err != nil { + return err + } + format, err := json.MarshalIndent(dm.Setting, "", "") + if err != nil { + return err + } + + progress := utils.NewProgress(false, false) + bar := progress.AddCountBar("Collected entries", 1) // with root + showProgress := func(totalIncr, currentIncr int64) { + bar.IncrTotal(totalIncr) + bar.IncrInt64(currentIncr) + } + dm.FSTree.Attr.Inode = 1 + entries := make(map[Ino]*DumpedEntry) + if err = collectEntry(dm.FSTree, entries, showProgress); err != nil { + return err + } + if dm.Trash != nil { + bar.IncrTotal(1) + if err = collectEntry(dm.Trash, entries, showProgress); err != nil { + return err + } + } + bar.Done() + + counters := &DumpedCounters{ + NextInode: 2, + NextChunk: 1, + } + refs := make(map[string]int64) + bar = progress.AddCountBar("Loaded entries", int64(len(entries))) + maxNum := 100 + pool := make(chan struct{}, maxNum) + errCh := make(chan error, 100) + done := make(chan struct{}, 1) + var wg sync.WaitGroup + for _, entry := range entries { + select { + case err = <-errCh: + return err + default: + } + pool <- struct{}{} + wg.Add(1) + go func(entry *DumpedEntry) { + defer func() { + wg.Done() + bar.Increment() + <-pool + }() + if err = m.loadEntry(entry, counters, refs); err != nil { + errCh <- err + } + }(entry) + } + + go func() { + wg.Wait() + close(done) + }() + + select { + case err = <-errCh: + return err + case <-done: + } + progress.Done() + logger.Infof("Dumped counters: %+v", *dm.Counters) + logger.Infof("Loaded counters: %+v", *counters) + + return m.txn(func(tx kvTxn) error { + tx.set(m.fmtKey("setting"), format) + tx.set(m.counterKey(usedSpace), packCounter(counters.UsedSpace)) + tx.set(m.counterKey(totalInodes), packCounter(counters.UsedInodes)) + tx.set(m.counterKey("nextInode"), packCounter(counters.NextInode)) + tx.set(m.counterKey("nextChunk"), packCounter(counters.NextChunk)) + tx.set(m.counterKey("nextSession"), packCounter(counters.NextSession)) + tx.set(m.counterKey("nextTrash"), packCounter(counters.NextTrash)) + for _, d := range dm.DelFiles { + tx.set(m.delfileKey(d.Inode, d.Length), m.packInt64(d.Expire)) + } + for k, v := range refs { + if v > 1 { + tx.set([]byte(k), packCounter(v-1)) + } + } + return nil + }) +} diff --git a/pkg/meta/tkv_badger.go b/pkg/meta/tkv_badger.go new file mode 100644 index 0000000..be5ca3c --- /dev/null +++ b/pkg/meta/tkv_badger.go @@ -0,0 +1,271 @@ +//go:build !nobadger +// +build !nobadger + +/* + * JuiceFS, Copyright 2022 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bytes" + "time" + + badger "github.com/dgraph-io/badger/v3" + "github.com/juicedata/juicefs/pkg/utils" +) + +type badgerTxn struct { + t *badger.Txn + c *badger.DB +} + +func (tx *badgerTxn) scan(prefix []byte, handler func(key []byte, value []byte)) { + it := tx.t.NewIterator(badger.IteratorOptions{ + Prefix: prefix, + PrefetchValues: true, + PrefetchSize: 1024, + }) + defer it.Close() + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + value, err := item.ValueCopy(nil) + if err != nil { + panic(err) + } + handler(it.Item().Key(), value) + } +} + +func (tx *badgerTxn) get(key []byte) []byte { + item, err := tx.t.Get(key) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + panic(err) + } + value, err := item.ValueCopy(nil) + if err != nil { + panic(err) + } + return value +} + +func (tx *badgerTxn) gets(keys ...[]byte) [][]byte { + values := make([][]byte, len(keys)) + for i, key := range keys { + values[i] = tx.get(key) + } + return values +} + +func (tx *badgerTxn) scanRange(begin, end []byte) map[string][]byte { + it := tx.t.NewIterator(badger.IteratorOptions{ + PrefetchValues: true, + PrefetchSize: 1024, + }) + defer it.Close() + var ret = make(map[string][]byte) + for it.Seek(begin); it.Valid(); it.Next() { + item := it.Item() + key := item.Key() + if bytes.Compare(key, end) >= 0 { + break + } + var value []byte + value, err := item.ValueCopy(nil) + if err != nil { + panic(err) + } + ret[string(key)] = value + } + return ret +} + +func (tx *badgerTxn) scanKeys(prefix []byte) [][]byte { + it := tx.t.NewIterator(badger.IteratorOptions{ + PrefetchValues: false, + PrefetchSize: 1024, + Prefix: prefix, + }) + defer it.Close() + var ret [][]byte + for it.Rewind(); it.Valid(); it.Next() { + ret = append(ret, it.Item().KeyCopy(nil)) + } + return ret +} + +func (tx *badgerTxn) scanValues(prefix []byte, filter func(k, v []byte) bool) map[string][]byte { + it := tx.t.NewIterator(badger.IteratorOptions{ + PrefetchValues: true, + PrefetchSize: 1024, + Prefix: prefix, + }) + defer it.Close() + var ret = make(map[string][]byte) + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + value, err := item.ValueCopy(nil) + if err != nil { + panic(err) + } + if filter == nil || filter(item.Key(), value) { + ret[string(item.Key())] = value + } + } + return ret +} + +func (tx *badgerTxn) exist(prefix []byte) bool { + it := tx.t.NewIterator(badger.IteratorOptions{ + Prefix: prefix, + PrefetchSize: 1, + }) + defer it.Close() + it.Rewind() + return it.Valid() +} + +func (tx *badgerTxn) set(key, value []byte) { + err := tx.t.Set(key, value) + if err == badger.ErrTxnTooBig { + logger.Warn("Current transaction is too big, commit it") + if er := tx.t.Commit(); er != nil { + panic(er) + } + tx.t = tx.c.NewTransaction(true) + err = tx.t.Set(key, value) + } + if err != nil { + panic(err) + } +} + +func (tx *badgerTxn) append(key []byte, value []byte) []byte { + list := append(tx.get(key), value...) + tx.set(key, list) + return list +} + +func (tx *badgerTxn) incrBy(key []byte, value int64) int64 { + var newCounter int64 + buf := tx.get(key) + if len(buf) > 0 { + newCounter = parseCounter(buf) + } + if value != 0 { + newCounter += value + tx.set(key, packCounter(newCounter)) + } + return newCounter +} + +func (tx *badgerTxn) dels(keys ...[]byte) { + for _, key := range keys { + if err := tx.t.Delete(key); err != nil { + panic(err) + } + } +} + +type badgerClient struct { + client *badger.DB + ticker *time.Ticker +} + +func (c *badgerClient) name() string { + return "badger" +} + +func (c *badgerClient) shouldRetry(err error) bool { + return err == badger.ErrConflict +} + +func (c *badgerClient) txn(f func(kvTxn) error) (err error) { + tx := c.client.NewTransaction(true) + defer tx.Discard() + defer func() { + if r := recover(); r != nil { + fe, ok := r.(error) + if ok { + err = fe + } else { + panic(r) + } + } + }() + err = f(&badgerTxn{tx, c.client}) + if err != nil { + return err + } + return tx.Commit() +} + +func (c *badgerClient) reset(prefix []byte) error { + for { + tx := c.client.NewTransaction(true) + defer tx.Discard() + it := tx.NewIterator(badger.IteratorOptions{ + Prefix: prefix, + PrefetchSize: 1024, + }) + it.Rewind() + if !it.Valid() { + it.Close() + return nil + } + for ; it.Valid(); it.Next() { + if err := tx.Delete(it.Item().Key()); err == badger.ErrTxnTooBig { + break + } else if err != nil { + it.Close() + return err + } + } + it.Close() + if err := tx.Commit(); err != nil { + return err + } + } +} + +func (c *badgerClient) close() error { + c.ticker.Stop() + return c.client.Close() +} + +func newBadgerClient(addr string) (tkvClient, error) { + opt := badger.DefaultOptions(addr) + opt.Logger = utils.GetLogger("badger") + client, err := badger.Open(opt) + if err != nil { + return nil, err + } + ticker := time.NewTicker(time.Hour) + go func() { + for range ticker.C { + for client.RunValueLogGC(0.7) == nil { + } + } + }() + return &badgerClient{client, ticker}, err +} + +func init() { + Register("badger", newKVMeta) + drivers["badger"] = newBadgerClient +} diff --git a/pkg/meta/tkv_lock.go b/pkg/meta/tkv_lock.go new file mode 100644 index 0000000..2e8873e --- /dev/null +++ b/pkg/meta/tkv_lock.go @@ -0,0 +1,223 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/utils" +) + +type lockOwner struct { + sid uint64 + owner uint64 +} + +func marshalFlock(ls map[lockOwner]byte) []byte { + b := utils.NewBuffer(uint32(len(ls)) * 17) + for o, l := range ls { + b.Put64(o.sid) + b.Put64(o.owner) + b.Put8(l) + } + return b.Bytes() +} + +func unmarshalFlock(buf []byte) map[lockOwner]byte { + b := utils.FromBuffer(buf) + var ls = make(map[lockOwner]byte) + for b.HasMore() { + sid := b.Get64() + owner := b.Get64() + ltype := b.Get8() + ls[lockOwner{sid, owner}] = ltype + } + return ls +} + +func (m *kvMeta) Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno { + ikey := m.flockKey(inode) + var err error + lkey := lockOwner{m.sid, owner} + for { + err = m.txn(func(tx kvTxn) error { + v := tx.get(ikey) + ls := unmarshalFlock(v) + switch ltype { + case F_UNLCK: + delete(ls, lkey) + case F_RDLCK: + for _, l := range ls { + if l == 'W' { + return syscall.EAGAIN + } + } + ls[lkey] = 'R' + case F_WRLCK: + delete(ls, lkey) + if len(ls) > 0 { + return syscall.EAGAIN + } + ls[lkey] = 'W' + default: + return syscall.EINVAL + } + if len(ls) == 0 { + tx.dels(ikey) + } else { + tx.set(ikey, marshalFlock(ls)) + } + return nil + }) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return errno(err) +} + +func marshalPlock(ls map[lockOwner][]byte) []byte { + var size uint32 + for _, l := range ls { + size += 8 + 8 + 4 + uint32(len(l)) + } + b := utils.NewBuffer(size) + for k, records := range ls { + b.Put64(k.sid) + b.Put64(k.owner) + b.Put32(uint32(len(records))) + b.Put(records) + } + return b.Bytes() +} + +func unmarshalPlock(buf []byte) map[lockOwner][]byte { + b := utils.FromBuffer(buf) + var ls = make(map[lockOwner][]byte) + for b.HasMore() { + sid := b.Get64() + owner := b.Get64() + records := b.Get(int(b.Get32())) + ls[lockOwner{sid, owner}] = records + } + return ls +} + +func (m *kvMeta) Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno { + if *ltype == F_UNLCK { + *start = 0 + *end = 0 + *pid = 0 + return 0 + } + v, err := m.get(m.plockKey(inode)) + if err != nil { + return errno(err) + } + owners := unmarshalPlock(v) + delete(owners, lockOwner{m.sid, owner}) + for o, records := range owners { + ls := loadLocks(records) + for _, l := range ls { + // find conflicted locks + if (*ltype == F_WRLCK || l.ltype == F_WRLCK) && *end >= l.start && *start <= l.end { + *ltype = l.ltype + *start = l.start + *end = l.end + if o.sid == m.sid { + *pid = l.pid + } else { + *pid = 0 + } + return 0 + } + } + } + *ltype = F_UNLCK + *start = 0 + *end = 0 + *pid = 0 + return 0 +} + +func (m *kvMeta) Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno { + ikey := m.plockKey(inode) + var err error + lock := plockRecord{ltype, pid, start, end} + lkey := lockOwner{m.sid, owner} + for { + err = m.txn(func(tx kvTxn) error { + owners := unmarshalPlock(tx.get(ikey)) + if ltype == F_UNLCK { + records := owners[lkey] + ls := loadLocks(records) + if len(ls) == 0 { + return nil // change nothing + } + ls = updateLocks(ls, lock) + if len(ls) == 0 { + delete(owners, lkey) + } else { + owners[lkey] = dumpLocks(ls) + } + } else { + ls := loadLocks(owners[lkey]) + delete(owners, lkey) + for _, d := range owners { + ls := loadLocks(d) + for _, l := range ls { + // find conflicted locks + if (ltype == F_WRLCK || l.ltype == F_WRLCK) && end >= l.start && start <= l.end { + return syscall.EAGAIN + } + } + } + ls = updateLocks(ls, lock) + owners[lkey] = dumpLocks(ls) + } + if len(owners) == 0 { + tx.dels(ikey) + } else { + tx.set(ikey, marshalPlock(owners)) + } + return nil + }) + + if !block || err != syscall.EAGAIN { + break + } + if ltype == F_WRLCK { + time.Sleep(time.Millisecond * 1) + } else { + time.Sleep(time.Millisecond * 10) + } + if ctx.Canceled() { + return syscall.EINTR + } + } + return errno(err) +} diff --git a/pkg/meta/tkv_mem.go b/pkg/meta/tkv_mem.go new file mode 100644 index 0000000..e1e08bb --- /dev/null +++ b/pkg/meta/tkv_mem.go @@ -0,0 +1,289 @@ +//go:build !fdb +// +build !fdb + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "strings" + "sync" + + "github.com/google/btree" +) + +func init() { + Register("memkv", newKVMeta) + drivers["memkv"] = newMockClient +} + +const settingPath = "/tmp/juicefs.memkv.setting.json" + +func newMockClient(addr string) (tkvClient, error) { + client := &memKV{items: btree.New(2), temp: &kvItem{}} + if d, err := ioutil.ReadFile(settingPath); err == nil { + var buffer map[string][]byte + if err = json.Unmarshal(d, &buffer); err == nil { + for k, v := range buffer { + client.set(k, v) // not locked + } + } + } + return client, nil +} + +type memTxn struct { + store *memKV + observed map[string]int + buffer map[string][]byte +} + +func (tx *memTxn) get(key []byte) []byte { + k := string(key) + if v, ok := tx.buffer[k]; ok { + return v + } + tx.store.Lock() + defer tx.store.Unlock() + it := tx.store.get(k) + if it != nil { + tx.observed[k] = it.ver + return it.value + } else { + tx.observed[k] = 0 + return nil + } +} + +func (tx *memTxn) gets(keys ...[]byte) [][]byte { + values := make([][]byte, len(keys)) + for i, key := range keys { + values[i] = tx.get(key) + } + return values +} + +func (tx *memTxn) scanRange(begin_, end_ []byte) map[string][]byte { + tx.store.Lock() + defer tx.store.Unlock() + begin := string(begin_) + end := string(end_) + ret := make(map[string][]byte) + tx.store.items.AscendGreaterOrEqual(&kvItem{key: begin}, func(i btree.Item) bool { + it := i.(*kvItem) + if end == "" || it.key < end { + tx.observed[it.key] = it.ver + ret[it.key] = it.value + return true + } + return false + }) + return ret +} + +func (tx *memTxn) scan(prefix []byte, handler func(key []byte, value []byte)) { + tx.store.Lock() + defer tx.store.Unlock() + begin := string(prefix) + end := string(nextKey(prefix)) + tx.store.items.AscendGreaterOrEqual(&kvItem{key: begin}, func(i btree.Item) bool { + it := i.(*kvItem) + if it.key >= end { + return false + } + tx.observed[it.key] = it.ver + handler([]byte(it.key), it.value) + return true + }) +} + +func nextKey(key []byte) []byte { + if len(key) == 0 { + return nil + } + next := make([]byte, len(key)) + copy(next, key) + p := len(next) - 1 + for { + next[p]++ + if next[p] != 0 { + break + } + p-- + if p < 0 { + panic("can't scan keys for 0xFF") + } + } + return next +} + +func (tx *memTxn) scanKeys(prefix []byte) [][]byte { + var keys [][]byte + for k := range tx.scanValues(prefix, nil) { + keys = append(keys, []byte(k)) + } + return keys +} + +func (tx *memTxn) scanValues(prefix []byte, filter func(k, v []byte) bool) map[string][]byte { + res := tx.scanRange(prefix, nextKey(prefix)) + for k, v := range res { + if filter != nil && !filter([]byte(k), v) { + delete(res, k) + } + } + return res +} + +func (tx *memTxn) exist(prefix []byte) bool { + return len(tx.scanKeys(prefix)) > 0 +} + +func (tx *memTxn) set(key, value []byte) { + tx.buffer[string(key)] = value +} + +func (tx *memTxn) append(key []byte, value []byte) []byte { + new := append(tx.get(key), value...) + tx.set(key, new) + return new +} + +func (tx *memTxn) incrBy(key []byte, value int64) int64 { + var new int64 + buf := tx.get(key) + if len(buf) > 0 { + new = parseCounter(buf) + } + if value != 0 { + new += value + tx.set(key, packCounter(new)) + } + return new +} + +func (tx *memTxn) dels(keys ...[]byte) { + for _, key := range keys { + tx.buffer[string(key)] = nil + } +} + +type kvItem struct { + key string + ver int + value []byte +} + +func (it *kvItem) Less(o btree.Item) bool { + return it.key < o.(*kvItem).key +} + +type memKV struct { + sync.Mutex + items *btree.BTree + temp *kvItem +} + +func (c *memKV) name() string { + return "memkv" +} + +func (c *memKV) shouldRetry(err error) bool { + return strings.Contains(err.Error(), "write conflict") +} + +func (c *memKV) get(key string) *kvItem { + c.temp.key = key + it := c.items.Get(c.temp) + if it != nil { + return it.(*kvItem) + } + return nil +} + +func (c *memKV) set(key string, value []byte) { + c.temp.key = key + if value == nil { + c.items.Delete(c.temp) + return + } + it := c.items.Get(c.temp) + if it != nil { + it.(*kvItem).ver++ + it.(*kvItem).value = value + } else { + c.items.ReplaceOrInsert(&kvItem{key: key, ver: 1, value: value}) + } +} + +func (c *memKV) txn(f func(kvTxn) error) error { + tx := &memTxn{ + store: c, + observed: make(map[string]int), + buffer: make(map[string][]byte), + } + if err := f(tx); err != nil { + return err + } + + if len(tx.buffer) == 0 { + return nil + } + c.Lock() + defer c.Unlock() + for k, ver := range tx.observed { + it := c.get(k) + if it == nil && ver != 0 { + return fmt.Errorf("write conflict: %s was version %d, now deleted", k, ver) + } else if it != nil && it.ver > ver { + return fmt.Errorf("write conflict: %s %d > %d", k, it.ver, ver) + } + } + if _, ok := tx.buffer["setting"]; ok { + d, _ := json.Marshal(tx.buffer) + if err := ioutil.WriteFile(settingPath, d, 0644); err != nil { + return err + } + } + for k, value := range tx.buffer { + c.set(k, value) + } + return nil +} + +func (c *memKV) reset(prefix []byte) error { + if len(prefix) == 0 { + c.Lock() + c.items = btree.New(2) + c.temp = &kvItem{} + c.Unlock() + return nil + } + return c.txn(func(kt kvTxn) error { + kt.scan(prefix, func(key, value []byte) { + kt.dels(key) + }) + return nil + }) +} + +func (c *memKV) close() error { + return nil +} diff --git a/pkg/meta/tkv_prefix.go b/pkg/meta/tkv_prefix.go new file mode 100644 index 0000000..933d599 --- /dev/null +++ b/pkg/meta/tkv_prefix.go @@ -0,0 +1,124 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "fmt" + +type prefixTxn struct { + kvTxn + prefix []byte +} + +func (tx *prefixTxn) realKey(key []byte) []byte { + return append(tx.prefix, key...) +} + +func (tx *prefixTxn) origKey(key []byte) []byte { + return key[len(tx.prefix):] +} + +func (tx *prefixTxn) get(key []byte) []byte { + return tx.kvTxn.get(tx.realKey(key)) +} + +func (tx *prefixTxn) gets(keys ...[]byte) [][]byte { + for i, key := range keys { + keys[i] = tx.realKey(key) + } + return tx.kvTxn.gets(keys...) +} + +func (tx *prefixTxn) scanRange(begin_, end_ []byte) map[string][]byte { + r := tx.kvTxn.scanRange(tx.realKey(begin_), tx.realKey(end_)) + m := make(map[string][]byte, len(r)) + for k, v := range r { + m[k[len(tx.prefix):]] = v + } + return m +} +func (tx *prefixTxn) scan(prefix []byte, handler func(key, value []byte)) { + tx.kvTxn.scan(tx.realKey(prefix), func(key, value []byte) { + key = tx.origKey(key) + handler(key, value) + }) +} +func (tx *prefixTxn) scanKeys(prefix []byte) [][]byte { + keys := tx.kvTxn.scanKeys(tx.realKey(prefix)) + for i, k := range keys { + keys[i] = tx.origKey(k) + } + return keys +} + +func (tx *prefixTxn) scanValues(prefix []byte, filter func(k, v []byte) bool) map[string][]byte { + r := tx.kvTxn.scanValues(tx.realKey(prefix), func(k, v []byte) bool { + if filter == nil { + return true + } + return filter(tx.origKey(k), v) + }) + m := make(map[string][]byte, len(r)) + for k, v := range r { + m[k[len(tx.prefix):]] = v + } + return m +} + +func (tx *prefixTxn) exist(prefix []byte) bool { + return tx.kvTxn.exist(tx.realKey(prefix)) +} + +func (tx *prefixTxn) set(key, value []byte) { + tx.kvTxn.set(tx.realKey(key), value) +} + +func (tx *prefixTxn) append(key []byte, value []byte) []byte { + return tx.kvTxn.append(tx.realKey(key), value) +} + +func (tx *prefixTxn) incrBy(key []byte, value int64) int64 { + return tx.kvTxn.incrBy(tx.realKey(key), value) +} + +func (tx *prefixTxn) dels(keys ...[]byte) { + for i, key := range keys { + keys[i] = tx.realKey(key) + } + tx.kvTxn.dels(keys...) +} + +type prefixClient struct { + tkvClient + prefix []byte +} + +func (c *prefixClient) txn(f func(kvTxn) error) error { + return c.tkvClient.txn(func(tx kvTxn) error { + return f(&prefixTxn{tx, c.prefix}) + }) +} + +func (c *prefixClient) reset(prefix []byte) error { + if prefix != nil { + return fmt.Errorf("prefix must be nil") + } + return c.tkvClient.reset(c.prefix) +} + +func withPrefix(client tkvClient, prefix []byte) tkvClient { + return &prefixClient{client, prefix} +} diff --git a/pkg/meta/tkv_test.go b/pkg/meta/tkv_test.go new file mode 100644 index 0000000..58eb692 --- /dev/null +++ b/pkg/meta/tkv_test.go @@ -0,0 +1,171 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//nolint:errcheck +package meta + +import ( + "bytes" + "os" + "testing" +) + +func TestMemKVClient(t *testing.T) { + _ = os.Remove(settingPath) + m, err := newKVMeta("memkv", "jfs-unit-test", &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "memkv" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func TestTiKVClient(t *testing.T) { + m, err := newKVMeta("tikv", "127.0.0.1:2379/jfs-unit-test", &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "tikv" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func TestBadgerClient(t *testing.T) { + m, err := newKVMeta("badger", "badger", &Config{MaxDeletes: 1}) + if err != nil || m.Name() != "badger" { + t.Fatalf("create meta: %s", err) + } + testMeta(t, m) +} + +func testTKV(t *testing.T, c tkvClient) { + txn := func(f func(kt kvTxn)) { + if err := c.txn(func(kt kvTxn) error { + f(kt) + return nil + }); err != nil { + t.Fatal(err) + } + } + // basic + err := c.reset(nil) + if err != nil { + t.Fatalf("reset: %s", err) + } + var hasKey bool + txn(func(kt kvTxn) { hasKey = kt.exist(nil) }) + if hasKey { + t.Fatalf("has key after reset") + } + k := []byte("k") + v := []byte("value") + + txn(func(kt kvTxn) { + kt.set(k, v) + kt.append(k, v) + }) + var r []byte + txn(func(kt kvTxn) { r = kt.get(k) }) + if !bytes.Equal(r, []byte("valuevalue")) { + t.Fatalf("expect 'valuevalue', but got %v", string(r)) + } + txn(func(kt kvTxn) { + kt.set([]byte("k2"), v) + kt.set([]byte("v"), k) + }) + var ks [][]byte + txn(func(kt kvTxn) { ks = kt.gets([]byte("k1"), []byte("k2")) }) + if ks[0] != nil || string(ks[1]) != "value" { + t.Fatalf("gets k1,k2: %+v", ks) + } + + var keys [][]byte + txn(func(kt kvTxn) { + kt.scan([]byte("k"), func(key, value []byte) { + keys = append(keys, key) + }) + }) + if len(keys) != 2 || string(keys[0]) != "k" || string(keys[1]) != "k2" { + t.Fatalf("keys: %+v", keys) + } + txn(func(kt kvTxn) { keys = kt.scanKeys(nil) }) + if len(keys) != 3 || string(keys[0]) != "k" || string(keys[1]) != "k2" || string(keys[2]) != "v" { + t.Fatalf("keys: %+v", keys) + } + var values map[string][]byte + txn(func(kt kvTxn) { values = kt.scanValues([]byte("k"), func(k, v []byte) bool { return len(v) == 5 }) }) + if len(values) != 1 || string(values["k2"]) != "value" { + t.Fatalf("scan values: %+v", values) + } + txn(func(kt kvTxn) { values = kt.scanRange([]byte("k2"), []byte("v")) }) + if len(values) != 1 || string(values["k2"]) != "value" { + t.Fatalf("scanRange: %+v", values) + } + + // exists + txn(func(kt kvTxn) { hasKey = kt.exist([]byte("k")) }) + if !hasKey { + t.Fatalf("has key k*") + } + txn(func(kt kvTxn) { kt.dels(keys...) }) + txn(func(kt kvTxn) { r = kt.get(k) }) + if r != nil { + t.Fatalf("expect nil, but got %v", string(r)) + } + txn(func(kt kvTxn) { keys = kt.scanKeys(nil) }) + if len(keys) != 0 { + t.Fatalf("no keys: %+v", keys) + } + txn(func(kt kvTxn) { hasKey = kt.exist(nil) }) + if hasKey { + t.Fatalf("has not keys") + } + + // counters + var count int64 + c.txn(func(tx kvTxn) error { + count = tx.incrBy([]byte("counter"), -1) + return nil + }) + if count != -1 { + t.Fatalf("counter should be -1, but got %d", count) + } + c.txn(func(tx kvTxn) error { + count = tx.incrBy([]byte("counter"), 0) + return nil + }) + if count != -1 { + t.Fatalf("counter should be -1, but got %d", count) + } + c.txn(func(tx kvTxn) error { + count = tx.incrBy([]byte("counter"), 2) + return nil + }) + if count != 1 { + t.Fatalf("counter should be 1, but got %d", count) + } +} + +func TestBadgerKV(t *testing.T) { + c, err := newBadgerClient("test_badger") + if err != nil { + t.Fatal(err) + } + testTKV(t, c) +} + +func TestMemKV(t *testing.T) { + c, _ := newTkvClient("memkv", "") + c = withPrefix(c, []byte("jfs")) + testTKV(t, c) +} diff --git a/pkg/meta/tkv_tikv.go b/pkg/meta/tkv_tikv.go new file mode 100644 index 0000000..205939c --- /dev/null +++ b/pkg/meta/tkv_tikv.go @@ -0,0 +1,242 @@ +//go:build !notikv +// +build !notikv + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "context" + "strings" + + plog "github.com/pingcap/log" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + tikverr "github.com/tikv/client-go/v2/error" + "github.com/tikv/client-go/v2/tikv" +) + +func init() { + Register("tikv", newKVMeta) + drivers["tikv"] = newTikvClient + +} + +func newTikvClient(addr string) (tkvClient, error) { + var plvl string // TiKV (PingCap) uses uber-zap logging, make it less verbose + switch logger.Level { + case logrus.TraceLevel: + plvl = "debug" + case logrus.DebugLevel: + plvl = "info" + case logrus.InfoLevel, logrus.WarnLevel: + plvl = "warn" + case logrus.ErrorLevel: + plvl = "error" + default: + plvl = "dpanic" + } + l, prop, _ := plog.InitLogger(&plog.Config{Level: plvl}) + plog.ReplaceGlobals(l, prop) + + p := strings.Index(addr, "/") + var prefix string + if p > 0 { + prefix = addr[p+1:] + addr = addr[:p] + } + pds := strings.Split(addr, ",") + client, err := tikv.NewTxnClient(pds) + if err != nil { + return nil, err + } + return withPrefix(&tikvClient{client}, append([]byte(prefix), 0xFD)), nil +} + +type tikvTxn struct { + *tikv.KVTxn +} + +func (tx *tikvTxn) get(key []byte) []byte { + value, err := tx.Get(context.TODO(), key) + if tikverr.IsErrNotFound(err) { + return nil + } + if err != nil { + panic(err) + } + return value +} + +func (tx *tikvTxn) gets(keys ...[]byte) [][]byte { + ret, err := tx.BatchGet(context.TODO(), keys) + if err != nil { + panic(err) + } + values := make([][]byte, len(keys)) + for i, key := range keys { + values[i] = ret[string(key)] + } + return values +} + +func (tx *tikvTxn) scanRange0(begin, end []byte, filter func(k, v []byte) bool) map[string][]byte { + it, err := tx.Iter(begin, end) + if err != nil { + panic(err) + } + defer it.Close() + var ret = make(map[string][]byte) + for it.Valid() { + key := it.Key() + value := it.Value() + if filter == nil || filter(key, value) { + ret[string(key)] = value + } + if err = it.Next(); err != nil { + panic(err) + } + } + return ret +} + +func (tx *tikvTxn) scanRange(begin, end []byte) map[string][]byte { + return tx.scanRange0(begin, end, nil) +} + +func (tx *tikvTxn) scan(prefix []byte, handler func(key, value []byte)) { + it, err := tx.Iter(prefix, nil) //nolint:typecheck + if err != nil { + panic(err) + } + defer it.Close() + for it.Valid() { + handler(it.Key(), it.Value()) + if err = it.Next(); err != nil { + panic(err) + } + } +} + +func (tx *tikvTxn) scanKeys(prefix []byte) [][]byte { + it, err := tx.Iter(prefix, nextKey(prefix)) + if err != nil { + panic(err) + } + defer it.Close() + var ret [][]byte + for it.Valid() { + ret = append(ret, it.Key()) + if err = it.Next(); err != nil { + panic(err) + } + } + return ret +} + +func (tx *tikvTxn) scanValues(prefix []byte, filter func(k, v []byte) bool) map[string][]byte { + return tx.scanRange0(prefix, nextKey(prefix), filter) +} + +func (tx *tikvTxn) exist(prefix []byte) bool { + it, err := tx.Iter(prefix, nextKey(prefix)) + if err != nil { + panic(err) + } + defer it.Close() + return it.Valid() +} + +func (tx *tikvTxn) set(key, value []byte) { + if err := tx.Set(key, value); err != nil { + panic(err) + } +} + +func (tx *tikvTxn) append(key []byte, value []byte) []byte { + new := append(tx.get(key), value...) + tx.set(key, new) + return new +} + +func (tx *tikvTxn) incrBy(key []byte, value int64) int64 { + var new int64 + buf := tx.get(key) + if len(buf) > 0 { + new = parseCounter(buf) + } + if value != 0 { + new += value + tx.set(key, packCounter(new)) + } + return new +} + +func (tx *tikvTxn) dels(keys ...[]byte) { + for _, key := range keys { + if err := tx.Delete(key); err != nil { + panic(err) + } + } +} + +type tikvClient struct { + client *tikv.KVStore +} + +func (c *tikvClient) name() string { + return "tikv" +} + +func (c *tikvClient) shouldRetry(err error) bool { + return strings.Contains(err.Error(), "write conflict") || strings.Contains(err.Error(), "TxnLockNotFound") +} + +func (c *tikvClient) txn(f func(kvTxn) error) (err error) { + tx, err := c.client.Begin() + if err != nil { + return err + } + defer func() { + if r := recover(); r != nil { + fe, ok := r.(error) + if ok { + err = fe + } else { + err = errors.Errorf("tikv client txn func error: %v", r) + } + } + }() + if err = f(&tikvTxn{tx}); err != nil { + return err + } + if !tx.IsReadOnly() { + tx.SetEnable1PC(true) + tx.SetEnableAsyncCommit(true) + err = tx.Commit(context.Background()) + } + return err +} + +func (c *tikvClient) reset(prefix []byte) error { + _, err := c.client.DeleteRange(context.Background(), prefix, nextKey(prefix), 1) + return err +} + +func (c *tikvClient) close() error { + return c.client.Close() +} diff --git a/pkg/meta/utils.go b/pkg/meta/utils.go new file mode 100644 index 0000000..dab9cdc --- /dev/null +++ b/pkg/meta/utils.go @@ -0,0 +1,308 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import ( + "bytes" + "fmt" + "runtime/debug" + "sort" + "strings" + "sync" + "syscall" + + "github.com/go-redis/redis/v8" + "github.com/juicedata/juicefs/pkg/utils" +) + +const ( + usedSpace = "usedSpace" + totalInodes = "totalInodes" + delfiles = "delfiles" + allSessions = "sessions" + sessionInfos = "sessionInfos" + sliceRefs = "sliceRef" +) + +const ( + // fallocate + fallocKeepSize = 0x01 + fallocPunchHole = 0x02 + // RESERVED: fallocNoHideStale = 0x04 + fallocCollapesRange = 0x08 + fallocZeroRange = 0x10 + fallocInsertRange = 0x20 +) + +type msgCallbacks struct { + sync.Mutex + callbacks map[uint32]MsgCallback +} + +type freeID struct { + next uint64 + maxid uint64 +} + +var logger = utils.GetLogger("juicefs") + +func errno(err error) syscall.Errno { + if err == nil { + return 0 + } + if eno, ok := err.(syscall.Errno); ok { + return eno + } + if err == redis.Nil { + return syscall.ENOENT + } + if strings.HasPrefix(err.Error(), "OOM") { + return syscall.ENOSPC + } + logger.Errorf("error: %s\n%s", err, debug.Stack()) + return syscall.EIO +} + +func accessMode(attr *Attr, uid uint32, gids []uint32) uint8 { + if uid == 0 { + return 0x7 + } + mode := attr.Mode + if uid == attr.Uid { + return uint8(mode>>6) & 7 + } + for _, gid := range gids { + if gid == attr.Gid { + return uint8(mode>>3) & 7 + } + } + return uint8(mode & 7) +} + +func align4K(length uint64) int64 { + if length == 0 { + return 1 << 12 + } + return int64((((length - 1) >> 12) + 1) << 12) +} + +func lookupSubdir(m Meta, subdir string) (Ino, error) { + var root Ino = 1 + for subdir != "" { + ps := strings.SplitN(subdir, "/", 2) + if ps[0] != "" { + var attr Attr + r := m.Lookup(Background, root, ps[0], &root, &attr) + if r != 0 { + return 0, fmt.Errorf("lookup subdir %s: %s", ps[0], r) + } + if attr.Typ != TypeDirectory { + return 0, fmt.Errorf("%s is not a redirectory", ps[0]) + } + } + if len(ps) == 1 { + break + } + subdir = ps[1] + } + return root, nil +} + +type plockRecord struct { + ltype uint32 + pid uint32 + start uint64 + end uint64 +} + +func loadLocks(d []byte) []plockRecord { + var ls []plockRecord + rb := utils.FromBuffer(d) + for rb.HasMore() { + ls = append(ls, plockRecord{rb.Get32(), rb.Get32(), rb.Get64(), rb.Get64()}) + } + return ls +} + +func dumpLocks(ls []plockRecord) []byte { + wb := utils.NewBuffer(uint32(len(ls)) * 24) + for _, l := range ls { + wb.Put32(l.ltype) + wb.Put32(l.pid) + wb.Put64(l.start) + wb.Put64(l.end) + } + return wb.Bytes() +} + +func updateLocks(ls []plockRecord, nl plockRecord) []plockRecord { + // ls is ordered by l.start without overlap + size := len(ls) + for i := 0; i < size && nl.start <= nl.end; i++ { + l := ls[i] + if nl.start < l.start && nl.end >= l.start { + // split nl + ls = append(ls, nl) + ls[len(ls)-1].end = l.start - 1 + nl.start = l.start + } + if nl.start > l.start && nl.start <= l.end { + // split l + l.end = nl.start - 1 + ls = append(ls, l) + ls[i].start = nl.start + l = ls[i] + } + if nl.start == l.start { + ls[i].ltype = nl.ltype // update l + ls[i].pid = nl.pid + if l.end > nl.end { + // split l + ls[i].end = nl.end + l.start = nl.end + 1 + ls = append(ls, l) + } + nl.start = ls[i].end + 1 + } + } + if nl.start <= nl.end { + ls = append(ls, nl) + } + sort.Slice(ls, func(i, j int) bool { return ls[i].start < ls[j].start }) + for i := 0; i < len(ls); { + if ls[i].ltype == F_UNLCK || ls[i].start > ls[i].end { + // remove empty one + copy(ls[i:], ls[i+1:]) + ls = ls[:len(ls)-1] + } else { + if i+1 < len(ls) && ls[i].ltype == ls[i+1].ltype && ls[i].pid == ls[i+1].pid && ls[i].end+1 == ls[i+1].start { + // combine continuous range + ls[i].end = ls[i+1].end + ls[i+1].start = ls[i+1].end + 1 + } + i++ + } + } + return ls +} + +func emptyDir(r Meta, ctx Context, inode Ino, concurrent chan int) syscall.Errno { + if st := r.Access(ctx, inode, 3, nil); st != 0 { + return st + } + var entries []*Entry + if st := r.Readdir(ctx, inode, 0, &entries); st != 0 { + return st + } + var wg sync.WaitGroup + var status syscall.Errno + for _, e := range entries { + if e.Inode == inode || len(e.Name) == 2 && string(e.Name) == ".." { + continue + } + if e.Attr.Typ == TypeDirectory { + select { + case concurrent <- 1: + wg.Add(1) + go func(child Ino, name string) { + defer wg.Done() + e := emptyEntry(r, ctx, inode, name, child, concurrent) + if e != 0 { + status = e + } + <-concurrent + }(e.Inode, string(e.Name)) + default: + if st := emptyEntry(r, ctx, inode, string(e.Name), e.Inode, concurrent); st != 0 { + return st + } + } + } else { + if st := r.Unlink(ctx, inode, string(e.Name)); st != 0 { + return st + } + } + } + wg.Wait() + return status +} + +func emptyEntry(r Meta, ctx Context, parent Ino, name string, inode Ino, concurrent chan int) syscall.Errno { + st := emptyDir(r, ctx, inode, concurrent) + if st == 0 { + st = r.Rmdir(ctx, parent, name) + if st == syscall.ENOTEMPTY { + st = emptyEntry(r, ctx, parent, name, inode, concurrent) + } + } + return st +} + +func Remove(r Meta, ctx Context, parent Ino, name string) syscall.Errno { + if st := r.Access(ctx, parent, 3, nil); st != 0 { + return st + } + var inode Ino + var attr Attr + if st := r.Lookup(ctx, parent, name, &inode, &attr); st != 0 { + return st + } + if attr.Typ != TypeDirectory { + return r.Unlink(ctx, parent, name) + } + concurrent := make(chan int, 50) + return emptyEntry(r, ctx, parent, name, inode, concurrent) +} + +func GetSummary(r Meta, ctx Context, inode Ino, summary *Summary, recursive bool) syscall.Errno { + var attr Attr + if st := r.GetAttr(ctx, inode, &attr); st != 0 { + return st + } + if attr.Typ == TypeDirectory { + var entries []*Entry + if st := r.Readdir(ctx, inode, 1, &entries); st != 0 { + return st + } + for _, e := range entries { + if e.Inode == inode || len(e.Name) == 2 && bytes.Equal(e.Name, []byte("..")) { + continue + } + if e.Attr.Typ == TypeDirectory { + if recursive { + if st := GetSummary(r, ctx, e.Inode, summary, recursive); st != 0 { + return st + } + } else { + summary.Dirs++ + summary.Size += 4096 + } + } else { + summary.Files++ + summary.Length += e.Attr.Length + summary.Size += uint64(align4K(e.Attr.Length)) + } + } + summary.Dirs++ + summary.Size += 4096 + } else { + summary.Files++ + summary.Length += attr.Length + summary.Size += uint64(align4K(attr.Length)) + } + return 0 +} diff --git a/pkg/meta/utils_darwin.go b/pkg/meta/utils_darwin.go new file mode 100644 index 0000000..30b1ccb --- /dev/null +++ b/pkg/meta/utils_darwin.go @@ -0,0 +1,20 @@ +package meta + +import ( + "syscall" + + sys "golang.org/x/sys/unix" +) + +const ENOATTR = syscall.ENOATTR +const ( + F_UNLCK = syscall.F_UNLCK + F_RDLCK = syscall.F_RDLCK + F_WRLCK = syscall.F_WRLCK +) + +const ( + XattrCreateOrReplace = 0 + XattrCreate = sys.XATTR_CREATE + XattrReplace = sys.XATTR_REPLACE +) diff --git a/pkg/meta/utils_linux.go b/pkg/meta/utils_linux.go new file mode 100644 index 0000000..39d2670 --- /dev/null +++ b/pkg/meta/utils_linux.go @@ -0,0 +1,20 @@ +package meta + +import ( + "syscall" + + sys "golang.org/x/sys/unix" +) + +const ENOATTR = syscall.ENODATA +const ( + F_UNLCK = syscall.F_UNLCK + F_RDLCK = syscall.F_RDLCK + F_WRLCK = syscall.F_WRLCK +) + +const ( + XattrCreateOrReplace = 0 + XattrCreate = sys.XATTR_CREATE + XattrReplace = sys.XATTR_REPLACE +) diff --git a/pkg/meta/utils_windows.go b/pkg/meta/utils_windows.go new file mode 100644 index 0000000..5157b99 --- /dev/null +++ b/pkg/meta/utils_windows.go @@ -0,0 +1,33 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package meta + +import "syscall" + +const ENOATTR = syscall.ENODATA + +const ( + F_UNLCK = 1 + F_RDLCK = 2 + F_WRLCK = 3 +) + +const ( + XattrCreateOrReplace = 0 + XattrCreate = 1 + XattrReplace = 2 +) diff --git a/pkg/metric/metrics.go b/pkg/metric/metrics.go new file mode 100644 index 0000000..b96b320 --- /dev/null +++ b/pkg/metric/metrics.go @@ -0,0 +1,151 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package metric + +import ( + "fmt" + "net" + "os" + "strconv" + "time" + + consulapi "github.com/hashicorp/consul/api" + "github.com/hashicorp/go-hclog" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" +) + +var logger = utils.GetLogger("juicefs") + +var ( + start = time.Now() + cpu = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cpu_usage", + Help: "Accumulated CPU usage in seconds.", + }, func() float64 { + ru := utils.GetRusage() + return ru.GetStime() + ru.GetUtime() + }) + memory = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "memory", + Help: "Used memory in bytes.", + }, func() float64 { + _, rss := utils.MemoryUsage() + return float64(rss) + }) + uptime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "uptime", + Help: "Total running time in seconds.", + }, func() float64 { + return time.Since(start).Seconds() + }) + usedSpace = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "used_space", + Help: "Total used space in bytes.", + }) + usedInodes = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "used_inodes", + Help: "Total number of inodes.", + }) +) + +func UpdateMetrics(m meta.Meta) { + prometheus.MustRegister(cpu) + prometheus.MustRegister(memory) + prometheus.MustRegister(uptime) + prometheus.MustRegister(usedSpace) + prometheus.MustRegister(usedInodes) + + ctx := meta.Background + for { + var totalSpace, availSpace, iused, iavail uint64 + err := m.StatFS(ctx, &totalSpace, &availSpace, &iused, &iavail) + if err == 0 { + usedSpace.Set(float64(totalSpace - availSpace)) + usedInodes.Set(float64(iused)) + } + time.Sleep(time.Second * 10) + } +} + +func RegisterToConsul(consulAddr, metricsAddr, mountPoint string) { + if metricsAddr == "" { + logger.Errorf("Metrics server start err,so can't register to consul") + return + } + localIp, portStr, err := net.SplitHostPort(metricsAddr) + if err != nil { + logger.Errorf("Metrics url format err:%s", err) + return + } + + // Don't register 0.0.0.0 to consul + if localIp == "0.0.0.0" || localIp == "::" { + localIp, err = utils.GetLocalIp(consulAddr) + if err != nil { + logger.Errorf("Get local ip failed: %v", err) + return + } + } + port, err := strconv.Atoi(portStr) + if err != nil { + logger.Errorf("Metrics port set err:%s", err) + return + } + config := consulapi.DefaultConfigWithLogger(hclog.New(&hclog.LoggerOptions{ //nolint:typecheck + Name: "consul-api", + Output: logger.Out, + })) + config.Address = consulAddr + client, err := consulapi.NewClient(config) + if err != nil { + logger.Errorf("Creat consul client failed:%s", err) + return + } + + localMeta := make(map[string]string) + hostname, err := os.Hostname() + if err != nil { + logger.Errorf("Get hostname failed:%s", err) + return + } + localMeta["hostName"] = hostname + localMeta["mountPoint"] = mountPoint + + check := &consulapi.AgentServiceCheck{ + HTTP: fmt.Sprintf("http://%s:%d/metrics", localIp, port), + Timeout: "5s", + Interval: "5s", + DeregisterCriticalServiceAfter: "30s", + } + + registration := consulapi.AgentServiceRegistration{ + ID: fmt.Sprintf("%s:%s", localIp, mountPoint), + Name: "juicefs", + Port: port, + Address: localIp, + Meta: localMeta, + Check: check, + } + if err = client.Agent().ServiceRegister(®istration); err != nil { + logger.Errorf("Service register failed:%s", err) + } else { + logger.Info("Juicefs register to consul success") + } +} diff --git a/pkg/object/README.md b/pkg/object/README.md new file mode 100644 index 0000000..ce9824c --- /dev/null +++ b/pkg/object/README.md @@ -0,0 +1,73 @@ + +The following object store are supported: + +- file: local files +- sftp: FTP via SSH +- s3: Amazon S3 +- hdfs: Hadoop File System (HDFS) +- gcs: Google Cloud Storage +- wasb: Windows Azure Blob Storage +- oss: Aliyun OSS +- cos: Tencent Cloud COS +- ks3: KSYun KS3 +- ufile: UCloud UFile +- qingstor: Qingcloud QingStor +- bos: Baidu Cloud Object Storage +- jss: JCloud Object Storage +- qiniu: Qiniu +- b2: Backblaze B2 +- space: Digital Ocean Space +- obs: Huawei Object Storage Service +- oos: CTYun OOS +- scw: Scaleway Object Storage +- minio: MinIO +- scs: Sina Cloud Storage +- eos: ECloud (China Mobile Cloud) Object Storage + +they should be specified in the following format: + +[NAME://][ACCESS_KEY:SECRET_KEY@]BUCKET[.ENDPOINT][/PREFIX] + +Some examples: + +- local/path +- user@host:port:path +- file:///Users/me/code/ +- hdfs://hdfs@namenode1:9000,namenode2:9000/user/ +- s3://my-bucket/ +- s3://access-key:secret-key-id@my-bucket/prefix +- wasb://account-name:account-key@my-container/prefix +- gcs://my-bucket.us-west1.googleapi.com/ +- oss://test +- cos://test-1234 +- obs://my-bucket +- bos://my-bucket +- minio://myip:9000/bucket +- scs://access-key:secret-key-id@my-bucket.sinacloud.net/prefix + +Note: + +- It's recommended to run it in the target region to have better performance. +- Auto discover endpoint for bucket of S3, OSS, COS, OBS, BOS, `SRC` and `DST` can use format `NAME://[ACCESS_KEY:SECRET_KEY@]BUCKET[/PREFIX]` . `ACCESS_KEY` and `SECRET_KEY` can be provided by corresponding environment variables (see below). +- S3: + * The access key and secret key for S3 could be provided by `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`, or *IAM* role. +- Wasb(Windows Azure Storage Blob) + * The account name and account key can be provided as [connection string](https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#configure-a-connection-string-for-an-azure-storage-account) by `AZURE_STORAGE_CONNECTION_STRING`. +- GCS: The machine should be authorized to access Google Cloud Storage. +- OSS: + * The credential can be provided by environment variable `ALICLOUD_ACCESS_KEY_ID` and `ALICLOUD_ACCESS_KEY_SECRET` , RAM role, [EMR MetaService](https://help.aliyun.com/document_detail/43966.html). +- COS: + * The AppID should be part of the bucket name. + * The credential can be provided by environment variable `COS_SECRETID` and `COS_SECRETKEY`. +- OBS: + * The credential can be provided by environment variable `HWCLOUD_ACCESS_KEY` and `HWCLOUD_SECRET_KEY` . +- BOS: + * The credential can be provided by environment variable `BDCLOUD_ACCESS_KEY` and `BDCLOUD_SECRET_KEY` . +- Qiniu: + The S3 endpoint should be used for Qiniu, for example, abc.cn-north-1-s3.qiniu.com. + If there are keys starting with "/", the domain should be provided as `QINIU_DOMAIN`. +- sftp: if your target machine uses SSH certificates instead of password, you should pass the path to your private key file to the environment variable `SSH_PRIVATE_KEY_PATH`, like ` SSH_PRIVATE_KEY_PATH=/home/someuser/.ssh/id_rsa juicefs sync [src] [dst]`. +- Scaleway: + * The credential can be provided by environment variable `SCW_ACCESS_KEY` and `SCW_SECRET_KEY` . +- MinIO: + * The credential can be provided by environment variable `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` . diff --git a/pkg/object/azure.go b/pkg/object/azure.go new file mode 100644 index 0000000..2bab181 --- /dev/null +++ b/pkg/object/azure.go @@ -0,0 +1,211 @@ +//go:build !noazure +// +build !noazure + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "log" + "net/url" + "os" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/storage" +) + +type wasb struct { + DefaultObjectStorage + container *storage.Container + marker string +} + +func (b *wasb) String() string { + return fmt.Sprintf("wasb://%s/", b.container.Name) +} + +func (b *wasb) Create() error { + _, err := b.container.CreateIfNotExists(&storage.CreateContainerOptions{}) + return err +} + +func (b *wasb) Head(key string) (Object, error) { + blob := b.container.GetBlobReference(key) + err := blob.GetProperties(nil) + if err != nil { + return nil, err + } + + return &obj{ + blob.Name, + blob.Properties.ContentLength, + time.Time(blob.Properties.LastModified), + strings.HasSuffix(blob.Name, "/"), + }, nil +} + +func (b *wasb) Get(key string, off, limit int64) (io.ReadCloser, error) { + blob := b.container.GetBlobReference(key) + var end int64 + if limit > 0 { + end = off + limit - 1 + } + return blob.GetRange(&storage.GetBlobRangeOptions{ + Range: &storage.BlobRange{ + Start: uint64(off), + End: uint64(end), + }, + }) +} + +func (b *wasb) Put(key string, data io.Reader) error { + return b.container.GetBlobReference(key).CreateBlockBlobFromReader(data, nil) +} + +func (b *wasb) Copy(dst, src string) error { + uri := b.container.GetBlobReference(src).GetURL() + return b.container.GetBlobReference(dst).Copy(uri, nil) +} + +func (b *wasb) Delete(key string) error { + return b.container.GetBlobReference(key).Delete(nil) +} + +func (b *wasb) List(prefix, marker string, limit int64) ([]Object, error) { + if marker != "" { + if b.marker == "" { + // last page + return nil, nil + } + marker = b.marker + } + resp, err := b.container.ListBlobs(storage.ListBlobsParameters{ + Prefix: prefix, + Marker: marker, + MaxResults: uint(limit), + }) + if err != nil { + b.marker = "" + return nil, err + } + b.marker = resp.NextMarker + n := len(resp.Blobs) + objs := make([]Object, n) + for i := 0; i < n; i++ { + blob := resp.Blobs[i] + mtime := time.Time(blob.Properties.LastModified) + objs[i] = &obj{ + blob.Name, + blob.Properties.ContentLength, + mtime, + strings.HasSuffix(blob.Name, "/"), + } + } + return objs, nil +} + +// TODO: support multipart upload + +func autoWasbEndpoint(containerName, accountName, accountKey string, useHTTPS bool) (string, error) { + baseURLs := []string{"core.windows.net", "core.chinacloudapi.cn"} + endpoint := "" + for _, baseURL := range baseURLs { + client, err := storage.NewClient(accountName, accountKey, baseURL, "2017-04-17", useHTTPS) + if err != nil { + log.Fatalf("Failed to create client: %v", err) + } + blobService := client.GetBlobService() + resp, err := blobService.ListContainers(storage.ListContainersParameters{Prefix: containerName, MaxResults: 1}) + if err != nil { + logger.Debugf("Try to list containers at %s failed: %s", baseURL, err) + continue + } + if len(resp.Containers) == 1 { + endpoint = baseURL + break + } + } + + if endpoint == "" { + return "", fmt.Errorf("fail to get endpoint for container %s", containerName) + } + return endpoint, nil +} + +func newWabs(endpoint, accountName, accountKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + + scheme := "" + domain := "" + // Connection string support: DefaultEndpointsProtocol=[http|https];AccountName=***;AccountKey=***;EndpointSuffix=[core.windows.net|core.chinacloudapi.cn] + if connString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING"); connString != "" { + items := strings.Split(connString, ";") + for _, item := range items { + if item = strings.TrimSpace(item); item == "" { + continue + } + parts := strings.SplitN(item, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("Invalid connection string item: %s", item) + } + // Arguments from command line take precedence + if parts[0] == "DefaultEndpointsProtocol" && scheme == "" { + scheme = parts[1] + } else if parts[0] == "AccountName" && accountName == "" { + accountName = parts[1] + } else if parts[0] == "AccountKey" && accountKey == "" { + accountKey = parts[1] + } else if parts[0] == "EndpointSuffix" && domain == "" { + domain = parts[1] + } + } + } + if scheme == "" { + scheme = "https" + } + name := hostParts[0] + if len(hostParts) > 1 { + // Arguments from command line take precedence + domain = hostParts[1] + } else if domain == "" { + if domain, err = autoWasbEndpoint(name, accountName, accountKey, scheme == "https"); err != nil { + return nil, fmt.Errorf("Unable to get endpoint of container %s: %s", name, err) + } + } + + client, err := storage.NewClient(accountName, accountKey, domain, "2017-04-17", scheme == "https") + if err != nil { + log.Fatalf("Failed to create client: %v", err) + } + service := client.GetBlobService() + container := service.GetContainerReference(name) + return &wasb{container: container}, nil +} + +func init() { + Register("wasb", newWabs) +} diff --git a/pkg/object/b2.go b/pkg/object/b2.go new file mode 100644 index 0000000..5885951 --- /dev/null +++ b/pkg/object/b2.go @@ -0,0 +1,170 @@ +//go:build !nob2 +// +build !nob2 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "net/url" + "strings" + "time" + + "gopkg.in/kothar/go-backblaze.v0" +) + +type b2client struct { + DefaultObjectStorage + bucket *backblaze.Bucket + nextMarker string +} + +func (c *b2client) String() string { + return fmt.Sprintf("b2://%s/", c.bucket.Name) +} + +func (c *b2client) Create() error { + return nil +} + +func (c *b2client) getFileInfo(key string) (*backblaze.File, error) { + f, r, err := c.bucket.DownloadFileRangeByName(key, &backblaze.FileRange{Start: 0, End: 1}) + if err != nil { + return nil, err + } + var buf [2]byte + _, _ = r.Read(buf[:]) + _ = r.Close() + return f, nil +} + +func (c *b2client) Head(key string) (Object, error) { + f, err := c.getFileInfo(key) + if err != nil { + return nil, err + } + return &obj{ + f.Name, + f.ContentLength, + time.Unix(f.UploadTimestamp/1000, 0), + strings.HasSuffix(f.Name, "/"), + }, nil +} + +func (c *b2client) Get(key string, off, limit int64) (io.ReadCloser, error) { + if off == 0 && limit == -1 { + _, r, err := c.bucket.DownloadFileByName(key) + return r, err + } + if limit == -1 { + limit = 1 << 50 + } + rang := &backblaze.FileRange{Start: off, End: off + limit - 1} + _, r, err := c.bucket.DownloadFileRangeByName(key, rang) + return r, err +} + +func (c *b2client) Put(key string, data io.Reader) error { + _, err := c.bucket.UploadFile(key, nil, data) + return err +} + +func (c *b2client) Copy(dst, src string) error { + f, err := c.getFileInfo(src) + if err != nil { + return err + } + _, err = c.bucket.CopyFile(f.ID, dst, "", backblaze.FileMetaDirectiveCopy) + return err +} + +func (c *b2client) Delete(key string) error { + f, err := c.getFileInfo(key) + if err != nil { + if strings.HasPrefix(err.Error(), "not_found") { + return nil + } + return err + } + _, err = c.bucket.DeleteFileVersion(key, f.ID) + return err +} + +func (c *b2client) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + if marker == "" && c.nextMarker != "" { + marker = c.nextMarker + c.nextMarker = "" + } + resp, err := c.bucket.ListFileNamesWithPrefix(marker, int(limit), prefix, "") + if err != nil { + return nil, err + } + + n := len(resp.Files) + objs := make([]Object, n) + for i := 0; i < n; i++ { + f := resp.Files[i] + objs[i] = &obj{ + f.Name, + f.ContentLength, + time.Unix(f.UploadTimestamp/1000, 0), + strings.HasSuffix(f.Name, "/"), + } + } + c.nextMarker = resp.NextFileName + return objs, nil +} + +// TODO: support multipart upload using S3 client + +func newB2(endpoint, keyID, applicationKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.Split(uri.Host, ".") + name := hostParts[0] + client, err := backblaze.NewB2(backblaze.Credentials{ + KeyID: keyID, + ApplicationKey: applicationKey, + }) + if err != nil { + return nil, fmt.Errorf("create B2 client: %s", err) + } + client.MaxIdleUploads = 20 + bucket, err := client.Bucket(name) + if err != nil { + logger.Warnf("access bucket %s: %s", name, err) + bucket, err = client.CreateBucket(name, "allPrivate") + if err != nil { + return nil, fmt.Errorf("create bucket %s: %s", name, err) + } + } + return &b2client{bucket: bucket}, nil +} + +func init() { + Register("b2", newB2) +} diff --git a/pkg/object/bos.go b/pkg/object/bos.go new file mode 100644 index 0000000..bd04eda --- /dev/null +++ b/pkg/object/bos.go @@ -0,0 +1,232 @@ +//go:build !nobos +// +build !nobos + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "net/url" + "os" + "strings" + "time" + + "github.com/baidubce/bce-sdk-go/bce" + "github.com/baidubce/bce-sdk-go/services/bos" + "github.com/baidubce/bce-sdk-go/services/bos/api" +) + +const bosDefaultRegion = "bj" + +type bosclient struct { + DefaultObjectStorage + bucket string + c *bos.Client +} + +func (q *bosclient) String() string { + return fmt.Sprintf("bos://%s/", q.bucket) +} + +func (q *bosclient) Create() error { + _, err := q.c.PutBucket(q.bucket) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (q *bosclient) Head(key string) (Object, error) { + r, err := q.c.GetObjectMeta(q.bucket, key) + if err != nil { + return nil, err + } + mtime, _ := time.Parse(time.RFC1123, r.LastModified) + return &obj{ + key, + r.ContentLength, + mtime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (q *bosclient) Get(key string, off, limit int64) (io.ReadCloser, error) { + var r *api.GetObjectResult + var err error + if limit > 0 { + r, err = q.c.GetObject(q.bucket, key, nil, off, off+limit-1) + } else if off > 0 { + r, err = q.c.GetObject(q.bucket, key, nil, off) + } else { + r, err = q.c.GetObject(q.bucket, key, nil) + } + if err != nil { + return nil, err + } + return r.Body, nil +} + +func (q *bosclient) Put(key string, in io.Reader) error { + b, vlen, err := findLen(in) + if err != nil { + return err + } + body, err := bce.NewBodyFromSizedReader(b, vlen) + if err != nil { + return err + } + _, err = q.c.BasicPutObject(q.bucket, key, body) + return err +} + +func (q *bosclient) Copy(dst, src string) error { + _, err := q.c.BasicCopyObject(q.bucket, dst, q.bucket, src) + return err +} + +func (q *bosclient) Delete(key string) error { + return q.c.DeleteObject(q.bucket, key) +} + +func (q *bosclient) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + limit_ := int(limit) + out, err := q.c.SimpleListObjects(q.bucket, prefix, limit_, marker, "") + if err != nil { + return nil, err + } + n := len(out.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + k := out.Contents[i] + mod, _ := time.Parse("2006-01-02T15:04:05Z", k.LastModified) + objs[i] = &obj{k.Key, int64(k.Size), mod, strings.HasSuffix(k.Key, "/")} + } + return objs, nil +} + +func (q *bosclient) CreateMultipartUpload(key string) (*MultipartUpload, error) { + r, err := q.c.BasicInitiateMultipartUpload(q.bucket, key) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: r.UploadId, MinPartSize: 4 << 20, MaxCount: 10000}, nil +} + +func (q *bosclient) UploadPart(key string, uploadID string, num int, data []byte) (*Part, error) { + body, _ := bce.NewBodyFromBytes(data) + etag, err := q.c.BasicUploadPart(q.bucket, key, uploadID, num, body) + if err != nil { + return nil, err + } + return &Part{Num: num, Size: len(data), ETag: etag}, nil +} + +func (q *bosclient) AbortUpload(key string, uploadID string) { + _ = q.c.AbortMultipartUpload(q.bucket, key, uploadID) +} + +func (q *bosclient) CompleteUpload(key string, uploadID string, parts []*Part) error { + oparts := make([]api.UploadInfoType, len(parts)) + for i := range parts { + oparts[i] = api.UploadInfoType{ + PartNumber: parts[i].Num, + ETag: parts[i].ETag, + } + } + ps := api.CompleteMultipartUploadArgs{Parts: oparts} + _, err := q.c.CompleteMultipartUploadFromStruct(q.bucket, key, uploadID, &ps) + return err +} + +func (q *bosclient) ListUploads(marker string) ([]*PendingPart, string, error) { + result, err := q.c.ListMultipartUploads(q.bucket, &api.ListMultipartUploadsArgs{ + MaxUploads: 1000, + KeyMarker: marker, + }) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{u.Key, u.UploadId, time.Time{}} + } + return parts, result.NextKeyMarker, nil +} + +func autoBOSEndpoint(bucketName, accessKey, secretKey string) (string, error) { + region := bosDefaultRegion + if r := os.Getenv("BDCLOUD_DEFAULT_REGION"); r != "" { + region = r + } + + endpoint := fmt.Sprintf("https://%s.bcebos.com", region) + bosCli, err := bos.NewClient(accessKey, secretKey, endpoint) + if err != nil { + return "", err + } + + if location, err := bosCli.GetBucketLocation(bucketName); err != nil { + return "", err + } else { + return fmt.Sprintf("%s.bcebos.com", location), nil + } +} + +func newBOS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucketName := hostParts[0] + if len(hostParts) > 1 { + endpoint = fmt.Sprintf("https://%s", hostParts[1]) + } + + if accessKey == "" { + accessKey = os.Getenv("BDCLOUD_ACCESS_KEY") + secretKey = os.Getenv("BDCLOUD_SECRET_KEY") + } + + if len(hostParts) == 1 { + if endpoint, err = autoBOSEndpoint(bucketName, accessKey, secretKey); err != nil { + return nil, fmt.Errorf("Fail to get location of bucket %q: %s", bucketName, err) + } + if !strings.HasPrefix(endpoint, "http") { + endpoint = fmt.Sprintf("%s://%s", uri.Scheme, endpoint) + } + logger.Debugf("Use endpoint: %s", endpoint) + } + + bosClient, err := bos.NewClient(accessKey, secretKey, endpoint) + if err != nil { + return nil, err + } + return &bosclient{bucket: bucketName, c: bosClient}, nil +} + +func init() { + Register("bos", newBOS) +} diff --git a/pkg/object/ceph.go b/pkg/object/ceph.go new file mode 100644 index 0000000..801eef1 --- /dev/null +++ b/pkg/object/ceph.go @@ -0,0 +1,233 @@ +//go:build ceph +// +build ceph + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "net/url" + "os" + "reflect" + "sort" + "strings" + "sync" + + "github.com/ceph/go-ceph/rados" +) + +type ceph struct { + DefaultObjectStorage + name string + conn *rados.Conn + free chan *rados.IOContext +} + +func (c *ceph) String() string { + return fmt.Sprintf("ceph://%s/", c.name) +} + +func (c *ceph) Create() error { + names, err := c.conn.ListPools() + if err != nil { + return err + } + for _, name := range names { + if name == c.name { + return nil + } + } + return c.conn.MakePool(c.name) +} + +func (c *ceph) newContext() (*rados.IOContext, error) { + select { + case ctx := <-c.free: + return ctx, nil + default: + return c.conn.OpenIOContext(c.name) + } +} + +func (c *ceph) release(ctx *rados.IOContext) { + select { + case c.free <- ctx: + default: + ctx.Destroy() + } +} + +func (c *ceph) do(f func(ctx *rados.IOContext) error) (err error) { + ctx, err := c.newContext() + if err != nil { + return err + } + err = f(ctx) + if err != nil { + ctx.Destroy() + } else { + c.release(ctx) + } + return +} + +type cephReader struct { + c *ceph + ctx *rados.IOContext + key string + off int64 + limit int64 +} + +func (r *cephReader) Read(buf []byte) (n int, err error) { + if r.limit > 0 && int64(len(buf)) > r.limit { + buf = buf[:r.limit] + } + n, err = r.ctx.Read(r.key, buf, uint64(r.off)) + r.off += int64(n) + if r.limit > 0 { + r.limit -= int64(n) + } + if err == nil && n < len(buf) { + err = io.EOF + } + return +} + +func (r *cephReader) Close() error { + if r.ctx != nil { + r.c.release(r.ctx) + r.ctx = nil + } + return nil +} + +func (c *ceph) Get(key string, off, limit int64) (io.ReadCloser, error) { + ctx, err := c.newContext() + if err != nil { + return nil, err + } + return &cephReader{c, ctx, key, off, limit}, nil +} + +var cephPool = sync.Pool{ + New: func() interface{} { + return make([]byte, 1<<20) + }, +} + +func (c *ceph) Put(key string, in io.Reader) error { + return c.do(func(ctx *rados.IOContext) error { + if b, ok := in.(*bytes.Reader); ok { + v := reflect.ValueOf(b) + data := v.Elem().Field(0).Bytes() + return ctx.WriteFull(key, data) + } + buf := cephPool.Get().([]byte) + defer cephPool.Put(buf) + var off uint64 + for { + n, err := in.Read(buf) + if n > 0 { + if err = ctx.Write(key, buf[:n], off); err != nil { + return err + } + off += uint64(n) + } else { + if err == io.EOF { + return nil + } + return err + } + } + }) +} + +func (c *ceph) Delete(key string) error { + return c.do(func(ctx *rados.IOContext) error { + return ctx.Delete(key) + }) +} + +func (c *ceph) ListAll(prefix, marker string) (<-chan Object, error) { + var objs = make(chan Object, 1000) + err := c.do(func(ctx *rados.IOContext) error { + defer close(objs) + iter, err := ctx.Iter() + if err != nil { + return err + } + defer iter.Close() + + // FIXME: this will be really slow for many objects + keys := make([]string, 0, 1000) + for iter.Next() { + key := iter.Value() + if key <= marker || !strings.HasPrefix(key, prefix) { + continue + } + keys = append(keys, key) + } + // the keys are not ordered, sort them first + sort.Strings(keys) + // TODO: parallel + for _, key := range keys { + st, err := ctx.Stat(key) + if err != nil { + continue // FIXME + } + objs <- &obj{key, int64(st.Size), st.ModTime, strings.HasSuffix(key, "/")} + } + return nil + }) + return objs, err +} + +func newCeph(endpoint, cluster, user string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("ceph://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + name := uri.Host + conn, err := rados.NewConnWithClusterAndUser(cluster, user) + if err != nil { + return nil, fmt.Errorf("Can't create connection to cluster %s for user %s: %s", cluster, user, err) + } + if os.Getenv("JFS_NO_CHECK_OBJECT_STORAGE") == "" { + if err := conn.ReadDefaultConfigFile(); err != nil { + return nil, fmt.Errorf("Can't read default config file: %s", err) + } + if err := conn.Connect(); err != nil { + return nil, fmt.Errorf("Can't connect to cluster %s: %s", cluster, err) + } + } + return &ceph{ + name: name, + conn: conn, + free: make(chan *rados.IOContext, 50), + }, nil +} + +func init() { + Register("ceph", newCeph) +} diff --git a/pkg/object/checksum.go b/pkg/object/checksum.go new file mode 100644 index 0000000..8ef4f27 --- /dev/null +++ b/pkg/object/checksum.go @@ -0,0 +1,80 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "hash/crc32" + "io" + "reflect" + "strconv" +) + +const checksumAlgr = "Crc32c" + +var crc32c = crc32.MakeTable(crc32.Castagnoli) + +func generateChecksum(in io.ReadSeeker) string { + if b, ok := in.(*bytes.Reader); ok { + v := reflect.ValueOf(b) + data := v.Elem().Field(0).Bytes() + return strconv.Itoa(int(crc32.Update(0, crc32c, data))) + } + var hash uint32 + crcBuffer := bufPool.Get().(*[]byte) + defer bufPool.Put(crcBuffer) + defer func() { _, _ = in.Seek(0, io.SeekStart) }() + for { + n, err := in.Read(*crcBuffer) + hash = crc32.Update(hash, crc32c, (*crcBuffer)[:n]) + if err != nil { + if err != io.EOF { + return "" + } + break + } + } + return strconv.Itoa(int(hash)) +} + +type checksumReader struct { + io.ReadCloser + expected uint32 + checksum uint32 +} + +func (c *checksumReader) Read(buf []byte) (n int, err error) { + n, err = c.ReadCloser.Read(buf) + c.checksum = crc32.Update(c.checksum, crc32c, buf[:n]) + if err == io.EOF && c.checksum != c.expected { + return 0, fmt.Errorf("verify checksum failed: %d != %d", c.checksum, c.expected) + } + return +} + +func verifyChecksum(in io.ReadCloser, checksum string) io.ReadCloser { + if checksum == "" { + return in + } + expected, err := strconv.Atoi(checksum) + if err != nil { + logger.Errorf("invalid crc32c: %s", checksum) + return in + } + return &checksumReader{in, uint32(expected), 0} +} diff --git a/pkg/object/checksum_test.go b/pkg/object/checksum_test.go new file mode 100644 index 0000000..636917e --- /dev/null +++ b/pkg/object/checksum_test.go @@ -0,0 +1,40 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "hash/crc32" + "strconv" + "testing" +) + +func TestChecksum(t *testing.T) { + b := []byte("hello") + expected := crc32.Update(0, crc32c, b) + actual := generateChecksum(bytes.NewReader(b)) + if actual != strconv.Itoa(int(expected)) { + t.Errorf("expect %d but got %s", expected, actual) + t.FailNow() + } + + actual = generateChecksum(bytes.NewReader(b)) + if actual != strconv.Itoa(int(expected)) { + t.Errorf("expect %d but got %s", expected, actual) + t.FailNow() + } +} diff --git a/pkg/object/cos.go b/pkg/object/cos.go new file mode 100644 index 0000000..13343d7 --- /dev/null +++ b/pkg/object/cos.go @@ -0,0 +1,255 @@ +//go:build !nocos +// +build !nocos + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strconv" + "strings" + "time" + + "github.com/tencentyun/cos-go-sdk-v5" +) + +const cosChecksumKey = "x-cos-meta-" + checksumAlgr + +type COS struct { + c *cos.Client + endpoint string +} + +func (c *COS) String() string { + return fmt.Sprintf("cos://%s/", strings.Split(c.endpoint, ".")[0]) +} + +func (c *COS) Create() error { + _, err := c.c.Bucket.Put(ctx, nil) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (c *COS) Head(key string) (Object, error) { + resp, err := c.c.Object.Head(ctx, key, nil) + if err != nil { + return nil, err + } + + header := resp.Header + var size int64 + if val, ok := header["Content-Length"]; ok { + if length, err := strconv.ParseInt(val[0], 10, 64); err == nil { + size = length + } + } + var mtime time.Time + if val, ok := header["Last-Modified"]; ok { + mtime, _ = time.Parse(time.RFC1123, val[0]) + } + + return &obj{key, size, mtime, strings.HasSuffix(key, "/")}, nil +} + +func (c *COS) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &cos.ObjectGetOptions{} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + params.Range = r + } + resp, err := c.c.Object.Get(ctx, key, params) + if err != nil { + return nil, err + } + if off == 0 && limit == -1 { + resp.Body = verifyChecksum(resp.Body, resp.Header.Get(cosChecksumKey)) + } + return resp.Body, nil +} + +func (c *COS) Put(key string, in io.Reader) error { + var options *cos.ObjectPutOptions + if ins, ok := in.(io.ReadSeeker); ok { + header := http.Header(map[string][]string{ + cosChecksumKey: {generateChecksum(ins)}, + }) + options = &cos.ObjectPutOptions{ObjectPutHeaderOptions: &cos.ObjectPutHeaderOptions{XCosMetaXXX: &header}} + } + _, err := c.c.Object.Put(ctx, key, in, options) + return err +} + +func (c *COS) Copy(dst, src string) error { + source := fmt.Sprintf("%s/%s", c.endpoint, src) + _, _, err := c.c.Object.Copy(ctx, dst, source, nil) + return err +} + +func (c *COS) Delete(key string) error { + _, err := c.c.Object.Delete(ctx, key) + return err +} + +func (c *COS) List(prefix, marker string, limit int64) ([]Object, error) { + param := cos.BucketGetOptions{ + Prefix: prefix, + Marker: marker, + MaxKeys: int(limit), + } + resp, _, err := c.c.Bucket.Get(ctx, ¶m) + for err == nil && len(resp.Contents) == 0 && resp.IsTruncated { + param.Marker = resp.NextMarker + resp, _, err = c.c.Bucket.Get(ctx, ¶m) + } + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + t, _ := time.Parse(time.RFC3339, o.LastModified) + objs[i] = &obj{o.Key, int64(o.Size), t, strings.HasSuffix(o.Key, "/")} + } + return objs, nil +} + +func (c *COS) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (c *COS) CreateMultipartUpload(key string) (*MultipartUpload, error) { + resp, _, err := c.c.Object.InitiateMultipartUpload(ctx, key, nil) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: resp.UploadID, MinPartSize: 5 << 20, MaxCount: 10000}, nil +} + +func (c *COS) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + resp, err := c.c.Object.UploadPart(ctx, key, uploadID, num, bytes.NewReader(body), nil) + if err != nil { + return nil, err + } + return &Part{Num: num, ETag: resp.Header.Get("Etag")}, nil +} + +func (c *COS) AbortUpload(key string, uploadID string) { + _, _ = c.c.Object.AbortMultipartUpload(ctx, key, uploadID) +} + +func (c *COS) CompleteUpload(key string, uploadID string, parts []*Part) error { + var cosParts []cos.Object + for i := range parts { + cosParts = append(cosParts, cos.Object{Key: key, ETag: parts[i].ETag, PartNumber: parts[i].Num}) + } + _, _, err := c.c.Object.CompleteMultipartUpload(ctx, key, uploadID, &cos.CompleteMultipartUploadOptions{Parts: cosParts}) + return err +} + +func (c *COS) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &cos.ListMultipartUploadsOptions{ + KeyMarker: marker, + } + result, _, err := c.c.Bucket.ListMultipartUploads(ctx, input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + t, _ := time.Parse(time.RFC3339, u.Initiated) + parts[i] = &PendingPart{u.Key, u.UploadID, t} + } + return parts, result.NextKeyMarker, nil +} + +func autoCOSEndpoint(bucketName, accessKey, secretKey string) (string, error) { + client := cos.NewClient(nil, &http.Client{ + Transport: &cos.AuthorizationTransport{ + SecretID: accessKey, + SecretKey: secretKey, + }, + }) + client.UserAgent = UserAgent + s, _, err := client.Service.Get(ctx) + if err != nil { + return "", err + } + + for _, b := range s.Buckets { + // fmt.Printf("%#v\n", b) + if b.Name == bucketName { + return fmt.Sprintf("https://%s.cos.%s.myqcloud.com", b.Name, b.Region), nil + } + } + + return "", fmt.Errorf("bucket %q doesnot exist", bucketName) +} + +func newCOS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + + if accessKey == "" { + accessKey = os.Getenv("COS_SECRETID") + secretKey = os.Getenv("COS_SECRETKEY") + } + + if len(hostParts) == 1 { + if endpoint, err = autoCOSEndpoint(hostParts[0], accessKey, secretKey); err != nil { + return nil, fmt.Errorf("Unable to get endpoint of bucket %s: %s", hostParts[0], err) + } + if uri, err = url.ParseRequestURI(endpoint); err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + logger.Debugf("Use endpoint %q", endpoint) + } + + b := &cos.BaseURL{BucketURL: uri} + client := cos.NewClient(b, &http.Client{ + Transport: &cos.AuthorizationTransport{ + SecretID: accessKey, + SecretKey: secretKey, + Transport: httpClient.Transport, + }, + }) + client.UserAgent = UserAgent + return &COS{client, uri.Host}, nil +} + +func init() { + Register("cos", newCOS) +} diff --git a/pkg/object/encrypt.go b/pkg/object/encrypt.go new file mode 100644 index 0000000..f8d9999 --- /dev/null +++ b/pkg/object/encrypt.go @@ -0,0 +1,239 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/pem" + "errors" + "fmt" + "io" + "io/ioutil" + "strings" +) + +type Encryptor interface { + Encrypt(plaintext []byte) ([]byte, error) + Decrypt(ciphertext []byte) ([]byte, error) +} + +type rsaEncryptor struct { + privKey *rsa.PrivateKey + label []byte +} + +func ExportRsaPrivateKeyToPem(key *rsa.PrivateKey, passphrase string) string { + buf := x509.MarshalPKCS1PrivateKey(key) + block := &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: buf, + } + if passphrase != "" { + var err error + // nolint:staticcheck + block, _ = x509.EncryptPEMBlock(rand.Reader, block.Type, buf, []byte(passphrase), x509.PEMCipherAES256) + if err != nil { + panic(err) + } + } + privPEM := pem.EncodeToMemory(block) + return string(privPEM) +} + +func ParseRsaPrivateKeyFromPem(privPEM string, passphrase string) (*rsa.PrivateKey, error) { + block, _ := pem.Decode([]byte(privPEM)) + if block == nil { + return nil, errors.New("failed to parse PEM block containing the key") + } + + buf := block.Bytes + // nolint:staticcheck + if strings.Contains(block.Headers["Proc-Type"], "ENCRYPTED") && + x509.IsEncryptedPEMBlock(block) { + if passphrase == "" { + return nil, fmt.Errorf("passphrase is required to private key") + } + var err error + // nolint:staticcheck + buf, err = x509.DecryptPEMBlock(block, []byte(passphrase)) + if err != nil { + if err == x509.IncorrectPasswordError { + return nil, err + } + return nil, fmt.Errorf("cannot decode encrypted private keys: %v", err) + } + } else if passphrase != "" { + logger.Warningf("passphrase is not used, because private key is not encrypted") + } + + priv, err := x509.ParsePKCS1PrivateKey(buf) + if err != nil { + return nil, err + } + + return priv, nil +} + +func ParseRsaPrivateKeyFromPath(path, passphrase string) (*rsa.PrivateKey, error) { + b, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + return ParseRsaPrivateKeyFromPem(string(b), passphrase) +} + +func NewRSAEncryptor(privKey *rsa.PrivateKey) Encryptor { + return &rsaEncryptor{privKey, []byte("keys")} +} + +func (e *rsaEncryptor) Encrypt(plaintext []byte) ([]byte, error) { + return rsa.EncryptOAEP(sha256.New(), rand.Reader, &e.privKey.PublicKey, plaintext, e.label) +} + +func (e *rsaEncryptor) Decrypt(ciphertext []byte) ([]byte, error) { + return rsa.DecryptOAEP(sha256.New(), rand.Reader, e.privKey, ciphertext, e.label) +} + +type aesEncryptor struct { + keyEncryptor Encryptor + keyLen int +} + +func NewAESEncryptor(keyEncryptor Encryptor) Encryptor { + return &aesEncryptor{keyEncryptor, 32} // AES-256-GCM +} + +func (e *aesEncryptor) Encrypt(plaintext []byte) ([]byte, error) { + key := make([]byte, e.keyLen) + if _, err := io.ReadFull(rand.Reader, key); err != nil { + return nil, err + } + cipherkey, err := e.keyEncryptor.Encrypt(key) + if err != nil { + return nil, err + } + block, err := aes.NewCipher(key) + if err != nil { + return nil, err + } + aesgcm, err := cipher.NewGCM(block) + if err != nil { + return nil, err + } + nonce := make([]byte, aesgcm.NonceSize()) + if _, err := io.ReadFull(rand.Reader, nonce); err != nil { + return nil, err + } + + headerSize := 3 + len(cipherkey) + len(nonce) + buf := make([]byte, headerSize+len(plaintext)+aesgcm.Overhead()) + buf[0] = byte(len(cipherkey) >> 8) + buf[1] = byte(len(cipherkey) & 0xFF) + buf[2] = byte(len(nonce)) + p := buf[3:] + copy(p, cipherkey) + p = p[len(cipherkey):] + copy(p, nonce) + p = p[len(nonce):] + ciphertext := aesgcm.Seal(p[:0], nonce, plaintext, nil) + return buf[:headerSize+len(ciphertext)], nil +} + +func (e *aesEncryptor) Decrypt(ciphertext []byte) ([]byte, error) { + keyLen := int(ciphertext[0])<<8 + int(ciphertext[1]) + nonceLen := int(ciphertext[2]) + if 3+keyLen+nonceLen >= len(ciphertext) { + return nil, fmt.Errorf("misformed ciphertext: %d %d", keyLen, nonceLen) + } + ciphertext = ciphertext[3:] + cipherkey := ciphertext[:keyLen] + nonce := ciphertext[keyLen : keyLen+nonceLen] + ciphertext = ciphertext[keyLen+nonceLen:] + + key, err := e.keyEncryptor.Decrypt(cipherkey) + if err != nil { + return nil, errors.New("decryt key: " + err.Error()) + } + block, err := aes.NewCipher(key) + if err != nil { + return nil, err + } + aesgcm, err := cipher.NewGCM(block) + if err != nil { + return nil, err + } + return aesgcm.Open(ciphertext[:0], nonce, ciphertext, nil) +} + +type encrypted struct { + ObjectStorage + enc Encryptor +} + +// NewEncrypted returns a encrypted object storage +func NewEncrypted(o ObjectStorage, enc Encryptor) ObjectStorage { + return &encrypted{o, enc} +} + +func (e *encrypted) String() string { + return fmt.Sprintf("%s(encrypted)", e.ObjectStorage) +} + +func (e *encrypted) Get(key string, off, limit int64) (io.ReadCloser, error) { + r, err := e.ObjectStorage.Get(key, 0, -1) + if err != nil { + return nil, err + } + defer r.Close() + ciphertext, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + plain, err := e.enc.Decrypt(ciphertext) + if err != nil { + return nil, fmt.Errorf("Decrypt: %s", err) + } + l := int64(len(plain)) + if off > l { + return nil, io.EOF + } + if limit == -1 || off+limit > l { + limit = l - off + } + data := plain[off : off+limit] + return ioutil.NopCloser(bytes.NewBuffer(data)), nil +} + +func (e *encrypted) Put(key string, in io.Reader) error { + plain, err := ioutil.ReadAll(in) + if err != nil { + return err + } + ciphertext, err := e.enc.Encrypt(plain) + if err != nil { + return err + } + return e.ObjectStorage.Put(key, bytes.NewReader(ciphertext)) +} + +var _ ObjectStorage = &encrypted{} diff --git a/pkg/object/encrypt_test.go b/pkg/object/encrypt_test.go new file mode 100644 index 0000000..18716ef --- /dev/null +++ b/pkg/object/encrypt_test.go @@ -0,0 +1,155 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "encoding/pem" + "io/ioutil" + "path/filepath" + "testing" +) + +var testkey = GenerateRsaKeyPair() + +func GenerateRsaKeyPair() *rsa.PrivateKey { + privkey, _ := rsa.GenerateKey(rand.Reader, 2048) + return privkey +} + +func TestRSA(t *testing.T) { + c1 := NewRSAEncryptor(testkey) + ciphertext, _ := c1.Encrypt([]byte("hello")) + + privPEM := ExportRsaPrivateKeyToPem(testkey, "abc") + key2, _ := ParseRsaPrivateKeyFromPem(privPEM, "abc") + c2 := NewRSAEncryptor(key2) + plaintext, _ := c2.Decrypt(ciphertext) + if string(plaintext) != "hello" { + t.Fail() + } + + _, err := ParseRsaPrivateKeyFromPem(privPEM, "") + if err == nil { + t.Errorf("parse without passphrase should fail") + t.Fail() + } + _, err = ParseRsaPrivateKeyFromPem(privPEM, "ab") + if err != x509.IncorrectPasswordError { + t.Errorf("parse without passphrase should return IncorrectPasswordError") + t.Fail() + } + + dir := t.TempDir() + + if err := genrsa(filepath.Join(dir, "private.pem"), ""); err != nil { + t.Error(err) + t.Fail() + } + if _, err = ParseRsaPrivateKeyFromPath(filepath.Join(dir, "private.pem"), ""); err != nil { + t.Error(err) + t.Fail() + } + + if err := genrsa(filepath.Join(dir, "private.pem"), "abcd"); err != nil { + t.Error(err) + t.Fail() + } + if _, err = ParseRsaPrivateKeyFromPath(filepath.Join(dir, "private.pem"), "abcd"); err != nil { + t.Error(err) + t.Fail() + } +} + +func genrsa(path string, password string) error { + key, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return err + } + block := &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(key), + } + if password != "" { + // nolint:staticcheck + block, err = x509.EncryptPEMBlock(rand.Reader, block.Type, block.Bytes, []byte(password), x509.PEMCipherAES256) + if err != nil { + return err + } + } + if err := ioutil.WriteFile(path, pem.EncodeToMemory(block), 0755); err != nil { + return err + } + return nil +} + +func BenchmarkRSA4096Encrypt(b *testing.B) { + secret := make([]byte, 32) + kc := NewRSAEncryptor(testkey) + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, _ = kc.Encrypt(secret) + } +} + +func BenchmarkRSA4096Decrypt(b *testing.B) { + secret := make([]byte, 32) + kc := NewRSAEncryptor(testkey) + ciphertext, _ := kc.Encrypt(secret) + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, _ = kc.Decrypt(ciphertext) + } +} + +func TestAESGCM(t *testing.T) { + kc := NewRSAEncryptor(testkey) + dc := NewAESEncryptor(kc) + data := []byte("hello") + ciphertext, _ := dc.Encrypt(data) + plaintext, _ := dc.Decrypt(ciphertext) + if !bytes.Equal(data, plaintext) { + t.Errorf("decrypt fail") + t.Fail() + } +} + +func TestEncryptedStore(t *testing.T) { + s, _ := CreateStorage("mem", "", "", "") + kc := NewRSAEncryptor(testkey) + dc := NewAESEncryptor(kc) + es := NewEncrypted(s, dc) + _ = es.Put("a", bytes.NewReader([]byte("hello"))) + r, err := es.Get("a", 1, 2) + if err != nil { + t.Errorf("Get a: %s", err) + t.Fail() + } + d, _ := ioutil.ReadAll(r) + if string(d) != "el" { + t.Fail() + } + + r, _ = es.Get("a", 0, -1) + d, _ = ioutil.ReadAll(r) + if string(d) != "hello" { + t.Fail() + } +} diff --git a/pkg/object/eos.go b/pkg/object/eos.go new file mode 100644 index 0000000..c6cc2cf --- /dev/null +++ b/pkg/object/eos.go @@ -0,0 +1,82 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "os" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type eos struct { + s3client +} + +func (s *eos) String() string { + return fmt.Sprintf("eos://%s/", s.s3client.bucket) +} + +func newEos(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid endpoint %s: %s", endpoint, err) + } + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + endpoint = uri.Host[len(bucket)+1:] + region := "us-east-1" + + if accessKey == "" { + accessKey = os.Getenv("EOS_ACCESS_KEY") + } + if secretKey == "" { + secretKey = os.Getenv("EOS_SECRET_KEY") + } + + awsConfig := &aws.Config{ + Endpoint: &endpoint, + Region: ®ion, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(false), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &eos{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("eos", newEos) +} diff --git a/pkg/object/file.go b/pkg/object/file.go new file mode 100644 index 0000000..b9901fc --- /dev/null +++ b/pkg/object/file.go @@ -0,0 +1,380 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math/rand" + "os" + "path" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "time" +) + +const ( + dirSuffix = "/" +) + +type filestore struct { + DefaultObjectStorage + root string +} + +func (d *filestore) String() string { + if runtime.GOOS == "windows" { + return "file:///" + d.root + } + return "file://" + d.root +} + +func (d *filestore) path(key string) string { + if strings.HasSuffix(d.root, dirSuffix) { + return filepath.Join(d.root, key) + } + return d.root + key +} + +func (d *filestore) Head(key string) (Object, error) { + p := d.path(key) + + fi, err := os.Stat(p) + if err != nil { + return nil, err + } + size := fi.Size() + if fi.IsDir() { + size = 0 + } + return &obj{ + key, + size, + fi.ModTime(), + fi.IsDir(), + }, nil +} + +func (d *filestore) Get(key string, off, limit int64) (io.ReadCloser, error) { + p := d.path(key) + + f, err := os.Open(p) + if err != nil { + return nil, err + } + + finfo, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, err + } + if finfo.IsDir() { + _ = f.Close() + return ioutil.NopCloser(bytes.NewBuffer([]byte{})), nil + } + + if off > 0 { + if _, err := f.Seek(off, 0); err != nil { + _ = f.Close() + return nil, err + } + } + if limit > 0 { + defer f.Close() + buf := make([]byte, limit) + if n, err := f.Read(buf); err != nil { + return nil, err + } else { + return ioutil.NopCloser(bytes.NewBuffer(buf[:n])), nil + } + } + return f, nil +} + +func (d *filestore) Put(key string, in io.Reader) error { + p := d.path(key) + + if strings.HasSuffix(key, dirSuffix) || key == "" && strings.HasSuffix(d.root, dirSuffix) { + return os.MkdirAll(p, os.FileMode(0755)) + } + + tmp := filepath.Join(filepath.Dir(p), "."+filepath.Base(p)+".tmp"+strconv.Itoa(rand.Int())) + f, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil && os.IsNotExist(err) { + if err := os.MkdirAll(filepath.Dir(p), os.FileMode(0755)); err != nil { + return err + } + f, err = os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + } + if err != nil { + return err + } + defer func() { + if err != nil { + _ = os.Remove(tmp) + } + }() + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + _, err = io.CopyBuffer(f, in, *buf) + if err != nil { + _ = f.Close() + return err + } + err = f.Close() + if err != nil { + return err + } + err = os.Rename(tmp, p) + return err +} + +func (d *filestore) Copy(dst, src string) error { + r, err := d.Get(src, 0, -1) + if err != nil { + return err + } + defer r.Close() + return d.Put(dst, r) +} + +func (d *filestore) Delete(key string) error { + err := os.Remove(d.path(key)) + if err != nil && os.IsNotExist(err) { + err = nil + } + return err +} + +// walk recursively descends path, calling w. +func walk(path string, info os.FileInfo, walkFn filepath.WalkFunc) error { + err := walkFn(path, info, nil) + if err != nil { + if info.IsDir() && err == filepath.SkipDir { + return nil + } + return err + } + + if !info.IsDir() { + return nil + } + + entries, err := readDirSorted(path) + if err != nil { + return walkFn(path, info, err) + } + + for _, e := range entries { + p := filepath.Join(path, e.Name()) + if e.IsDir() { + p = filepath.ToSlash(p + "/") + } + in, err := e.Info() + if err == nil { + err = walk(p, in, walkFn) + } + if err != nil && err != filepath.SkipDir && !os.IsNotExist(err) { + return err + } + } + return nil +} + +// Walk walks the file tree rooted at root, calling walkFn for each file or +// directory in the tree, including root. All errors that arise visiting files +// and directories are filtered by walkFn. The files are walked in lexical +// order, which makes the output deterministic but means that for very +// large directories Walk can be inefficient. +// Walk always follow symbolic links. +func Walk(root string, walkFn filepath.WalkFunc) error { + info, err := os.Stat(root) + if err != nil { + err = walkFn(root, nil, err) + } else { + err = walk(root, info, walkFn) + } + if err == filepath.SkipDir { + return nil + } + return err +} + +type mEntry struct { + os.DirEntry + name string + fi os.FileInfo +} + +func (m *mEntry) Name() string { + return m.name +} + +func (m *mEntry) Info() (os.FileInfo, error) { + if m.fi != nil { + return m.fi, nil + } + return m.DirEntry.Info() +} + +// readDirSorted reads the directory named by dirname and returns +// a sorted list of directory entries. +func readDirSorted(dirname string) ([]os.DirEntry, error) { + f, err := os.Open(dirname) + if err != nil { + return nil, err + } + defer f.Close() + entries, err := f.ReadDir(-1) + for i, e := range entries { + if e.IsDir() { + entries[i] = &mEntry{e, e.Name() + dirSuffix, nil} + } else if !e.Type().IsRegular() { + // follow symlink + fi, err := os.Stat(filepath.Join(dirname, e.Name())) + if err != nil { + logger.Warnf("skip broken symlink %s: %s", filepath.Join(dirname, e.Name()), err) + continue + } + name := e.Name() + if fi.IsDir() { + name = e.Name() + dirSuffix + } + entries[i] = &mEntry{e, name, fi} + } + } + sort.Slice(entries, func(i, j int) bool { return entries[i].Name() < entries[j].Name() }) + return entries, err +} + +func (d *filestore) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, notSupported +} + +func (d *filestore) ListAll(prefix, marker string) (<-chan Object, error) { + listed := make(chan Object, 10240) + go func() { + var walkRoot string + if strings.HasSuffix(d.root, dirSuffix) { + walkRoot = d.root + } else { + // If the root is not ends with `/`, we'll list the directory root resides. + walkRoot = path.Dir(d.root) + } + + _ = Walk(walkRoot, func(path string, info os.FileInfo, err error) error { + if runtime.GOOS == "windows" { + path = strings.Replace(path, "\\", "/", -1) + } + + if err != nil { + // skip broken symbolic link + if fi, err1 := os.Lstat(path); err1 == nil && fi.Mode()&os.ModeSymlink != 0 { + logger.Warnf("skip unreachable symlink: %s (%s)", path, err) + return nil + } + if os.IsNotExist(err) { + logger.Warnf("skip not exist file or directory: %s", path) + return nil + } + listed <- nil + logger.Errorf("list %s: %s", path, err) + return err + } + + if !strings.HasPrefix(path, d.root) { + if info.IsDir() && path != walkRoot { + return filepath.SkipDir + } + return nil + } + + key := path[len(d.root):] + if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) { + if info.IsDir() && !strings.HasPrefix(prefix, key) && !strings.HasPrefix(marker, key) { + return filepath.SkipDir + } + return nil + } + owner, group := getOwnerGroup(info) + f := &file{ + obj{ + key, + info.Size(), + info.ModTime(), + info.IsDir(), + }, + owner, + group, + info.Mode(), + } + if info.IsDir() { + f.size = 0 + } + listed <- f + return nil + }) + close(listed) + }() + return listed, nil +} + +func (d *filestore) Chtimes(path string, mtime time.Time) error { + p := d.path(path) + return os.Chtimes(p, mtime, mtime) +} + +func (d *filestore) Chmod(path string, mode os.FileMode) error { + p := d.path(path) + return os.Chmod(p, mode) +} + +func (d *filestore) Chown(path string, owner, group string) error { + p := d.path(path) + uid := lookupUser(owner) + gid := lookupGroup(group) + return os.Chown(p, uid, gid) +} + +func newDisk(root, accesskey, secretkey string) (ObjectStorage, error) { + // For Windows, the path looks like /C:/a/b/c/ + if runtime.GOOS == "windows" && strings.HasPrefix(root, "/") { + root = root[1:] + } + if strings.HasSuffix(root, dirSuffix) { + logger.Debugf("Ensure directory %s", root) + if err := os.MkdirAll(root, 0755); err != nil { + return nil, fmt.Errorf("Creating directory %s failed: %q", root, err) + } + } else { + dir := path.Dir(root) + logger.Debugf("Ensure directory %s", dir) + if err := os.MkdirAll(dir, 0755); err != nil { + return nil, fmt.Errorf("Creating directory %s failed: %q", dir, err) + } + } + return &filestore{root: root}, nil +} + +func init() { + Register("file", newDisk) +} diff --git a/pkg/object/file_unix.go b/pkg/object/file_unix.go new file mode 100644 index 0000000..a5a1fab --- /dev/null +++ b/pkg/object/file_unix.go @@ -0,0 +1,101 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "os" + "os/user" + "strconv" + "sync" + "syscall" + + "github.com/pkg/sftp" +) + +var uids = make(map[int]string) +var gids = make(map[int]string) +var users = make(map[string]int) +var groups = make(map[string]int) +var mutex sync.Mutex + +func userName(uid int) string { + name, ok := uids[uid] + if !ok { + if u, err := user.LookupId(strconv.Itoa(uid)); err == nil { + name = u.Username + uids[uid] = name + } + } + return name +} + +func groupName(gid int) string { + name, ok := gids[gid] + if !ok { + if g, err := user.LookupGroupId(strconv.Itoa(gid)); err == nil { + name = g.Name + gids[gid] = name + } + } + return name +} + +func getOwnerGroup(info os.FileInfo) (string, string) { + mutex.Lock() + defer mutex.Unlock() + var owner, group string + switch st := info.Sys().(type) { + case *syscall.Stat_t: + owner = userName(int(st.Uid)) + group = groupName(int(st.Gid)) + case *sftp.FileStat: + owner = userName(int(st.UID)) + group = groupName(int(st.GID)) + } + return owner, group +} + +func lookupUser(name string) int { + mutex.Lock() + defer mutex.Unlock() + if u, ok := users[name]; ok { + return u + } + var uid = -1 + if u, err := user.Lookup(name); err == nil { + uid, _ = strconv.Atoi(u.Uid) + } + users[name] = uid + return uid +} + +func lookupGroup(name string) int { + mutex.Lock() + defer mutex.Unlock() + if u, ok := groups[name]; ok { + return u + } + var gid = -1 + if u, err := user.LookupGroup(name); err == nil { + gid, _ = strconv.Atoi(u.Gid) + } + groups[name] = gid + return gid +} diff --git a/pkg/object/file_windows.go b/pkg/object/file_windows.go new file mode 100644 index 0000000..da09985 --- /dev/null +++ b/pkg/object/file_windows.go @@ -0,0 +1,31 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import "os" + +func getOwnerGroup(info os.FileInfo) (string, string) { + return "", "" +} + +func lookupUser(name string) int { + return 0 +} + +func lookupGroup(name string) int { + return 0 +} diff --git a/pkg/object/filesystem_test.go b/pkg/object/filesystem_test.go new file mode 100644 index 0000000..42490a8 --- /dev/null +++ b/pkg/object/filesystem_test.go @@ -0,0 +1,117 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "os" + "strings" + "testing" +) + +func testKeysEqual(objs []Object, expectedKeys []string) error { + gottenKeys := make([]string, len(objs)) + for idx, obj := range objs { + gottenKeys[idx] = obj.Key() + } + if len(gottenKeys) != len(expectedKeys) { + return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "), + strings.Join(gottenKeys, ", ")) + } + + for idx, key := range gottenKeys { + if key != expectedKeys[idx] { + return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "), + strings.Join(gottenKeys, ", ")) + } + } + return nil +} + +func TestDisk2(t *testing.T) { + s, _ := newDisk("/tmp/abc/", "", "") + testFileSystem(t, s) +} + +func TestSftp2(t *testing.T) { + if os.Getenv("SFTP_HOST") == "" { + t.SkipNow() + } + sftp, _ := newSftp(os.Getenv("SFTP_HOST"), os.Getenv("SFTP_USER"), os.Getenv("SFTP_PASS")) + testFileSystem(t, sftp) +} + +func TestHDFS2(t *testing.T) { + if os.Getenv("HDFS_ADDR") == "" { + t.Skip() + } + dfs, _ := newHDFS(os.Getenv("HDFS_ADDR"), "", "") + testFileSystem(t, dfs) +} + +func testFileSystem(t *testing.T, s ObjectStorage) { + keys := []string{ + "x/", + "x/x.txt", + "xy.txt", + "xyz/", + "xyz/xyz.txt", + } + // initialize directory tree + for _, key := range keys { + if err := s.Put(key, bytes.NewReader([]byte{})); err != nil { + t.Fatalf("PUT object `%s` failed: %q", key, err) + } + } + // cleanup + defer func() { + // delete reversely, directory only can be deleted when it's empty + idx := len(keys) - 1 + for ; idx >= 0; idx-- { + if err := s.Delete(keys[idx]); err != nil { + t.Fatalf("DELETE object `%s` failed: %q", keys[idx], err) + } + } + }() + objs, err := listAll(s, "x/", "", 100) + if err != nil { + t.Fatalf("list failed: %s", err) + } + expectedKeys := []string{"x/", "x/x.txt"} + if err = testKeysEqual(objs, expectedKeys); err != nil { + t.Fatalf("testKeysEqual fail: %s", err) + } + + objs, err = listAll(s, "x", "", 100) + if err != nil { + t.Fatalf("list failed: %s", err) + } + expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"} + if err = testKeysEqual(objs, expectedKeys); err != nil { + t.Fatalf("testKeysEqual fail: %s", err) + } + + objs, err = listAll(s, "xy", "", 100) + if err != nil { + t.Fatalf("list failed: %s", err) + } + expectedKeys = []string{"xy.txt", "xyz/", "xyz/xyz.txt"} + if err = testKeysEqual(objs, expectedKeys); err != nil { + t.Fatalf("testKeysEqual fail: %s", err) + } +} diff --git a/pkg/object/gs.go b/pkg/object/gs.go new file mode 100644 index 0000000..8bb94e6 --- /dev/null +++ b/pkg/object/gs.go @@ -0,0 +1,183 @@ +//go:build !nogs +// +build !nogs + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "context" + "fmt" + "io" + "net/url" + "os" + "strings" + + "github.com/pkg/errors" + + "google.golang.org/api/iterator" + + "cloud.google.com/go/compute/metadata" + "cloud.google.com/go/storage" + "golang.org/x/oauth2/google" +) + +type gs struct { + DefaultObjectStorage + client *storage.Client + bucket string + region string + pageToken string +} + +func (g *gs) String() string { + return fmt.Sprintf("gs://%s/", g.bucket) +} + +func (g *gs) Create() error { + // check if the bucket is already exists + if objs, err := g.List("", "", 1); err == nil && len(objs) > 0 { + return nil + } + + projectID := os.Getenv("GOOGLE_CLOUD_PROJECT") + if projectID == "" { + projectID, _ = metadata.ProjectID() + } + if projectID == "" { + cred, err := google.FindDefaultCredentials(context.Background()) + if err == nil { + projectID = cred.ProjectID + } + } + if projectID == "" { + return errors.New("GOOGLE_CLOUD_PROJECT environment variable must be set") + } + // Guess region when region is not provided + if g.region == "" { + zone, err := metadata.Zone() + if err == nil && len(zone) > 2 { + g.region = zone[:len(zone)-2] + } + if g.region == "" { + return errors.New("Could not guess region to create bucket") + } + } + + err := g.client.Bucket(g.bucket).Create(ctx, projectID, &storage.BucketAttrs{ + Name: g.bucket, + StorageClass: "regional", + Location: g.region, + }) + if err != nil && strings.Contains(err.Error(), "You already own this bucket") { + return nil + } + return err +} + +func (g *gs) Head(key string) (Object, error) { + attrs, err := g.client.Bucket(g.bucket).Object(key).Attrs(ctx) + if err != nil { + return nil, err + } + + return &obj{ + key, + attrs.Size, + attrs.Updated, + strings.HasSuffix(key, "/"), + }, nil +} + +func (g *gs) Get(key string, off, limit int64) (io.ReadCloser, error) { + reader, err := g.client.Bucket(g.bucket).Object(key).NewRangeReader(ctx, off, limit) + if err != nil { + return nil, err + } + return reader, nil +} + +func (g *gs) Put(key string, data io.Reader) error { + writer := g.client.Bucket(g.bucket).Object(key).NewWriter(ctx) + _, err := io.Copy(writer, data) + if err != nil { + return err + } + return writer.Close() +} + +func (g *gs) Copy(dst, src string) error { + srcObj := g.client.Bucket(g.bucket).Object(src) + dstObj := g.client.Bucket(g.bucket).Object(dst) + _, err := dstObj.CopierFrom(srcObj).Run(ctx) + return err +} + +func (g *gs) Delete(key string) error { + if err := g.client.Bucket(g.bucket).Object(key).Delete(ctx); err != storage.ErrObjectNotExist { + return err + } + return nil +} + +func (g *gs) List(prefix, marker string, limit int64) ([]Object, error) { + if marker != "" && g.pageToken == "" { + // last page + return nil, nil + } + objectIterator := g.client.Bucket(g.bucket).Objects(ctx, &storage.Query{Prefix: prefix}) + pager := iterator.NewPager(objectIterator, int(limit), g.pageToken) + var entries []*storage.ObjectAttrs + nextPageToken, err := pager.NextPage(&entries) + if err != nil { + return nil, err + } + g.pageToken = nextPageToken + n := len(entries) + objs := make([]Object, n) + for i := 0; i < n; i++ { + item := entries[i] + objs[i] = &obj{item.Name, item.Size, item.Updated, strings.HasSuffix(item.Name, "/")} + } + return objs, nil +} + +func newGS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("gs://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, errors.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + var region string + if len(hostParts) > 1 { + region = hostParts[1] + } + + client, err := storage.NewClient(ctx) + if err != nil { + return nil, err + } + return &gs{client: client, bucket: bucket, region: region}, nil +} + +func init() { + Register("gs", newGS) +} diff --git a/pkg/object/hdfs.go b/pkg/object/hdfs.go new file mode 100644 index 0000000..afd1f3d --- /dev/null +++ b/pkg/object/hdfs.go @@ -0,0 +1,362 @@ +//go:build !nohdfs +// +build !nohdfs + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math/rand" + "os" + "os/user" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/colinmarc/hdfs/v2" + "github.com/colinmarc/hdfs/v2/hadoopconf" +) + +var superuser = "hdfs" +var supergroup = "supergroup" + +type hdfsclient struct { + DefaultObjectStorage + addr string + c *hdfs.Client +} + +func (h *hdfsclient) String() string { + return fmt.Sprintf("hdfs://%s/", h.addr) +} + +func (h *hdfsclient) path(key string) string { + return "/" + key +} + +func (h *hdfsclient) Head(key string) (Object, error) { + info, err := h.c.Stat(h.path(key)) + if err != nil { + return nil, err + } + + hinfo := info.(*hdfs.FileInfo) + f := &file{ + obj{ + key, + info.Size(), + info.ModTime(), + info.IsDir(), + }, + hinfo.Owner(), + hinfo.OwnerGroup(), + info.Mode(), + } + if f.owner == superuser { + f.owner = "root" + } + if f.group == supergroup { + f.group = "root" + } + // stickybit from HDFS is different than golang + if f.mode&01000 != 0 { + f.mode &= ^os.FileMode(01000) + f.mode |= os.ModeSticky + } + if info.IsDir() { + f.size = 0 + if !strings.HasSuffix(f.key, "/") { + f.key += "/" + } + } + return f, nil +} + +type withCloser struct { + io.Reader + io.Closer +} + +func (h *hdfsclient) Get(key string, off, limit int64) (io.ReadCloser, error) { + f, err := h.c.Open(h.path(key)) + if err != nil { + return nil, err + } + + finfo := f.Stat() + if finfo.IsDir() { + return ioutil.NopCloser(bytes.NewBuffer([]byte{})), nil + } + + if off > 0 { + if _, err := f.Seek(off, io.SeekStart); err != nil { + _ = f.Close() + return nil, err + } + } + if limit > 0 { + return withCloser{io.LimitReader(f, limit), f}, nil + } + return f, nil +} + +const abcException = "org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException" + +func (h *hdfsclient) Put(key string, in io.Reader) error { + path := h.path(key) + if strings.HasSuffix(path, dirSuffix) { + return h.c.MkdirAll(path, os.FileMode(0755)) + } + tmp := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s.tmp.%d", filepath.Base(path), rand.Int())) + f, err := h.c.CreateFile(tmp, 3, 128<<20, 0755) + defer func() { _ = h.c.Remove(tmp) }() + if err != nil { + if pe, ok := err.(*os.PathError); ok && pe.Err == os.ErrNotExist { + _ = h.c.MkdirAll(filepath.Dir(path), 0755) + f, err = h.c.CreateFile(tmp, 3, 128<<20, 0755) + } + if pe, ok := err.(*os.PathError); ok { + if remoteErr, ok := pe.Err.(hdfs.Error); ok && remoteErr.Exception() == abcException { + pe.Err = os.ErrExist + } + if pe.Err == os.ErrExist { + _ = h.c.Remove(tmp) + f, err = h.c.CreateFile(tmp, 3, 128<<20, 0755) + } + } + if err != nil { + return err + } + } + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + _, err = io.CopyBuffer(f, in, *buf) + if err != nil { + _ = f.Close() + return err + } + err = f.Close() + if err != nil { + return err + } + return h.c.Rename(tmp, path) +} + +func (h *hdfsclient) Delete(key string) error { + err := h.c.Remove(h.path(key)) + if err != nil && os.IsNotExist(err) { + err = nil + } + return err +} + +func (h *hdfsclient) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, notSupported +} + +func (h *hdfsclient) walk(path string, walkFn filepath.WalkFunc) error { + file, err := h.c.Open(path) + var info os.FileInfo + if file != nil { + info = file.Stat() + } + + err = walkFn(path, info, err) + if err != nil { + if info != nil && info.IsDir() && err == filepath.SkipDir { + return nil + } + + return err + } + + if info == nil || !info.IsDir() { + return nil + } + + infos, err := file.Readdir(0) + if err != nil { + return walkFn(path, info, err) + } + + // make sure they are ordered in full path + names := make([]string, len(infos)) + for i, info := range infos { + if info.IsDir() { + names[i] = info.Name() + "/" + } else { + names[i] = info.Name() + } + } + sort.Strings(names) + + for _, name := range names { + name = strings.TrimSuffix(name, "/") + err = h.walk(filepath.ToSlash(filepath.Join(path, name)), walkFn) + if err != nil { + return err + } + } + + return nil +} + +func (h *hdfsclient) ListAll(prefix, marker string) (<-chan Object, error) { + listed := make(chan Object, 10240) + root := h.path(prefix) + _, err := h.c.Stat(root) + if err != nil && err.(*os.PathError).Err == os.ErrNotExist || !strings.HasSuffix(prefix, "/") { + root = filepath.Dir(root) + } + _, err = h.c.Stat(root) + if err != nil && err.(*os.PathError).Err == os.ErrNotExist { + close(listed) + return listed, nil // return empty list + } + go func() { + _ = h.walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + if err == io.EOF { + err = nil // ignore + } else { + logger.Errorf("list %s: %s", path, err) + listed <- nil + } + return err + } + + if !strings.HasSuffix(prefix, "/") && !strings.HasPrefix(info.Name(), prefix) { + if info.IsDir() && root != path { + return filepath.SkipDir + } + return nil + } + + key := path[1:] + if !strings.HasPrefix(key, prefix) || key < marker { + if info.IsDir() && !strings.HasPrefix(prefix, key) && !strings.HasPrefix(marker, key) { + return filepath.SkipDir + } + return nil + } + hinfo := info.(*hdfs.FileInfo) + f := &file{ + obj{ + key, + info.Size(), + info.ModTime(), + info.IsDir(), + }, + hinfo.Owner(), + hinfo.OwnerGroup(), + info.Mode(), + } + if f.owner == superuser { + f.owner = "root" + } + if f.group == supergroup { + f.group = "root" + } + // stickybit from HDFS is different than golang + if f.mode&01000 != 0 { + f.mode &= ^os.FileMode(01000) + f.mode |= os.ModeSticky + } + if info.IsDir() { + f.size = 0 + if path != root || !strings.HasSuffix(root, "/") { + f.key += "/" + } + } + listed <- f + return nil + }) + close(listed) + }() + return listed, nil +} + +func (h *hdfsclient) Chtimes(key string, mtime time.Time) error { + return h.c.Chtimes(h.path(key), mtime, mtime) +} + +func (h *hdfsclient) Chmod(key string, mode os.FileMode) error { + return h.c.Chmod(h.path(key), mode) +} + +func (h *hdfsclient) Chown(key string, owner, group string) error { + if owner == "root" { + owner = superuser + } + if group == "root" { + group = supergroup + } + return h.c.Chown(h.path(key), owner, group) +} + +func newHDFS(addr, username, sk string) (ObjectStorage, error) { + conf, err := hadoopconf.LoadFromEnvironment() + if err != nil { + return nil, fmt.Errorf("Problem loading configuration: %s", err) + } + + options := hdfs.ClientOptionsFromConf(conf) + if addr != "" { + options.Addresses = strings.Split(addr, ",") + } + + if options.KerberosClient != nil { + options.KerberosClient, err = getKerberosClient() + if err != nil { + return nil, fmt.Errorf("Problem with kerberos authentication: %s", err) + } + } else { + if username == "" { + username = os.Getenv("HADOOP_USER_NAME") + } + if username == "" { + current, err := user.Current() + if err != nil { + return nil, fmt.Errorf("get current user: %s", err) + } + username = current.Username + } + options.User = username + } + + c, err := hdfs.NewClient(options) + if err != nil { + return nil, fmt.Errorf("new HDFS client %s: %s", addr, err) + } + if os.Getenv("HADOOP_SUPER_USER") != "" { + superuser = os.Getenv("HADOOP_SUPER_USER") + } + if os.Getenv("HADOOP_SUPER_GROUP") != "" { + supergroup = os.Getenv("HADOOP_SUPER_GROUP") + } + + return &hdfsclient{addr: addr, c: c}, nil +} + +func init() { + Register("hdfs", newHDFS) +} diff --git a/pkg/object/hdfs_kerberos.go b/pkg/object/hdfs_kerberos.go new file mode 100644 index 0000000..34e520d --- /dev/null +++ b/pkg/object/hdfs_kerberos.go @@ -0,0 +1,60 @@ +//go:build !nohdfs +// +build !nohdfs + +// Copyright 2014 Colin Marc (colinmarc@gmail.com) +// borrowed from https://github.com/colinmarc/hdfs/blob/master/cmd/hdfs/kerberos.go + +package object + +import ( + "fmt" + "os" + "os/user" + "strings" + + krb "github.com/jcmturner/gokrb5/v8/client" + "github.com/jcmturner/gokrb5/v8/config" + "github.com/jcmturner/gokrb5/v8/credentials" +) + +func getKerberosClient() (*krb.Client, error) { + configPath := os.Getenv("KRB5_CONFIG") + if configPath == "" { + configPath = "/etc/krb5.conf" + } + + cfg, err := config.Load(configPath) + if err != nil { + return nil, err + } + + // Determine the ccache location from the environment, falling back to the + // default location. + ccachePath := os.Getenv("KRB5CCNAME") + if strings.Contains(ccachePath, ":") { + if strings.HasPrefix(ccachePath, "FILE:") { + ccachePath = strings.SplitN(ccachePath, ":", 2)[1] + } else { + return nil, fmt.Errorf("unusable ccache: %s", ccachePath) + } + } else if ccachePath == "" { + u, err := user.Current() + if err != nil { + return nil, err + } + + ccachePath = fmt.Sprintf("/tmp/krb5cc_%s", u.Uid) + } + + ccache, err := credentials.LoadCCache(ccachePath) + if err != nil { + return nil, err + } + + client, err := krb.NewFromCCache(ccache, cfg) + if err != nil { + return nil, err + } + + return client, nil +} diff --git a/pkg/object/ibmcos.go b/pkg/object/ibmcos.go new file mode 100644 index 0000000..1833db6 --- /dev/null +++ b/pkg/object/ibmcos.go @@ -0,0 +1,250 @@ +//go:build !noibmcos +// +build !noibmcos + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "strings" + + "github.com/IBM/ibm-cos-sdk-go/aws" + "github.com/IBM/ibm-cos-sdk-go/aws/credentials/ibmiam" + "github.com/IBM/ibm-cos-sdk-go/aws/session" + "github.com/IBM/ibm-cos-sdk-go/service/s3" +) + +type ibmcos struct { + bucket string + s3 *s3.S3 +} + +func (s *ibmcos) String() string { + return fmt.Sprintf("ibmcos://%s/", s.bucket) +} + +func (s *ibmcos) Create() error { + _, err := s.s3.CreateBucket(&s3.CreateBucketInput{Bucket: &s.bucket}) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (s *ibmcos) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + params.Range = &r + } + resp, err := s.s3.GetObject(params) + if err != nil { + return nil, err + } + return resp.Body, nil +} + +func (s *ibmcos) Put(key string, in io.Reader) error { + var body io.ReadSeeker + if b, ok := in.(io.ReadSeeker); ok { + body = b + } else { + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + body = bytes.NewReader(data) + } + params := &s3.PutObjectInput{ + Bucket: &s.bucket, + Key: &key, + Body: body, + } + _, err := s.s3.PutObject(params) + return err +} + +func (s *ibmcos) Copy(dst, src string) error { + src = s.bucket + "/" + src + params := &s3.CopyObjectInput{ + Bucket: &s.bucket, + Key: &dst, + CopySource: &src, + } + _, err := s.s3.CopyObject(params) + return err +} + +func (s *ibmcos) Head(key string) (Object, error) { + param := s3.HeadObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + r, err := s.s3.HeadObject(¶m) + if err != nil { + return nil, err + } + return &obj{ + key, + *r.ContentLength, + *r.LastModified, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *ibmcos) Delete(key string) error { + param := s3.DeleteObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + _, err := s.s3.DeleteObject(¶m) + return err +} + +func (s *ibmcos) List(prefix, marker string, limit int64) ([]Object, error) { + param := s3.ListObjectsInput{ + Bucket: &s.bucket, + Prefix: &prefix, + Marker: &marker, + MaxKeys: &limit, + } + resp, err := s.s3.ListObjects(¶m) + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + objs[i] = &obj{*o.Key, *o.Size, *o.LastModified, strings.HasSuffix(*o.Key, "/")} + } + return objs, nil +} + +func (s *ibmcos) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (s *ibmcos) CreateMultipartUpload(key string) (*MultipartUpload, error) { + params := &s3.CreateMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + } + resp, err := s.s3.CreateMultipartUpload(params) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: *resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil +} + +func (s *ibmcos) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + n := int64(num) + params := &s3.UploadPartInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + Body: bytes.NewReader(body), + PartNumber: &n, + } + resp, err := s.s3.UploadPart(params) + if err != nil { + return nil, err + } + return &Part{Num: num, ETag: *resp.ETag}, nil +} + +func (s *ibmcos) AbortUpload(key string, uploadID string) { + params := &s3.AbortMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + } + _, _ = s.s3.AbortMultipartUpload(params) +} + +func (s *ibmcos) CompleteUpload(key string, uploadID string, parts []*Part) error { + var s3Parts []*s3.CompletedPart + for i := range parts { + n := new(int64) + *n = int64(parts[i].Num) + s3Parts = append(s3Parts, &s3.CompletedPart{ETag: &parts[i].ETag, PartNumber: n}) + } + params := &s3.CompleteMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + MultipartUpload: &s3.CompletedMultipartUpload{Parts: s3Parts}, + } + _, err := s.s3.CompleteMultipartUpload(params) + return err +} + +func (s *ibmcos) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &s3.ListMultipartUploadsInput{ + Bucket: aws.String(s.bucket), + KeyMarker: aws.String(marker), + } + // FIXME: parsing time "2018-08-23T12:23:26.046+08:00" as "2006-01-02T15:04:05Z" + result, err := s.s3.ListMultipartUploads(input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{*u.Key, *u.UploadId, *u.Initiated} + } + var nextMarker string + if result.NextKeyMarker != nil { + nextMarker = *result.NextKeyMarker + } + return parts, nextMarker, nil +} + +func newIBMCOS(endpoint, apiKey, serviceInstanceID string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, _ := url.ParseRequestURI(endpoint) + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[2] + authEndpoint := "https://iam.cloud.ibm.com/identity/token" + serviceEndpoint := "https://" + strings.SplitN(uri.Host, ".", 2)[1] + conf := aws.NewConfig(). + WithRegion(region). + WithEndpoint(serviceEndpoint). + WithCredentials(ibmiam.NewStaticCredentials(aws.NewConfig(), + authEndpoint, apiKey, serviceInstanceID)). + WithS3ForcePathStyle(true) + sess := session.Must(session.NewSession()) + client := s3.New(sess, conf) + return &ibmcos{bucket, client}, nil +} + +func init() { + Register("ibmcos", newIBMCOS) +} diff --git a/pkg/object/interface.go b/pkg/object/interface.go new file mode 100644 index 0000000..27cbf92 --- /dev/null +++ b/pkg/object/interface.go @@ -0,0 +1,92 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "io" + "time" +) + +type Object interface { + Key() string + Size() int64 + Mtime() time.Time + IsDir() bool +} + +type obj struct { + key string + size int64 + mtime time.Time + isDir bool +} + +func (o *obj) Key() string { return o.key } +func (o *obj) Size() int64 { return o.size } +func (o *obj) Mtime() time.Time { return o.mtime } +func (o *obj) IsDir() bool { return o.isDir } + +type MultipartUpload struct { + MinPartSize int + MaxCount int + UploadID string +} + +type Part struct { + Num int + Size int + ETag string +} + +type PendingPart struct { + Key string + UploadID string + Created time.Time +} + +// ObjectStorage is the interface for object storage. +// all of these API should be idempotent. +type ObjectStorage interface { + // Description of the object storage. + String() string + // Create the bucket if not existed. + Create() error + // Get the data for the given object specified by key. + Get(key string, off, limit int64) (io.ReadCloser, error) + // Put data read from a reader to an object specified by key. + Put(key string, in io.Reader) error + // Delete a object. + Delete(key string) error + + // Head returns some information about the object or an error if not found. + Head(key string) (Object, error) + // List returns a list of objects. + List(prefix, marker string, limit int64) ([]Object, error) + // ListAll returns all the objects as an channel. + ListAll(prefix, marker string) (<-chan Object, error) + + // CreateMultipartUpload starts to upload a large object part by part. + CreateMultipartUpload(key string) (*MultipartUpload, error) + // UploadPart upload a part of an object. + UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) + // AbortUpload abort a multipart upload. + AbortUpload(key string, uploadID string) + // CompleteUpload finish an multipart upload. + CompleteUpload(key string, uploadID string, parts []*Part) error + // ListUploads lists existing multipart uploads. + ListUploads(marker string) ([]*PendingPart, string, error) +} diff --git a/pkg/object/jss.go b/pkg/object/jss.go new file mode 100644 index 0000000..bb36bc3 --- /dev/null +++ b/pkg/object/jss.go @@ -0,0 +1,82 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type jss struct { + s3client +} + +func (j *jss) String() string { + return fmt.Sprintf("jss://%s/", j.s3client.bucket) +} + +func (j *jss) Copy(dst, src string) error { + src = "/" + j.s3client.bucket + "/" + src + params := &s3.CopyObjectInput{ + Bucket: &j.s3client.bucket, + Key: &dst, + CopySource: &src, + } + _, err := j.s3client.s3.CopyObject(params) + return err +} + +func newJSS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, _ := url.ParseRequestURI(endpoint) + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[2] + endpoint = uri.Host[len(bucket)+1:] + + awsConfig := &aws.Config{ + Region: ®ion, + Endpoint: &endpoint, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(true), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, err + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &jss{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("jss", newJSS) +} diff --git a/pkg/object/ks3.go b/pkg/object/ks3.go new file mode 100644 index 0000000..16c5cf4 --- /dev/null +++ b/pkg/object/ks3.go @@ -0,0 +1,279 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "strings" + + "github.com/aws/aws-sdk-go/aws/session" + "github.com/ks3sdklib/aws-sdk-go/aws" + "github.com/ks3sdklib/aws-sdk-go/aws/credentials" + "github.com/ks3sdklib/aws-sdk-go/service/s3" +) + +type ks3 struct { + bucket string + s3 *s3.S3 + ses *session.Session +} + +func (s *ks3) String() string { + return fmt.Sprintf("ks3://%s/", s.bucket) +} +func (s *ks3) Create() error { + _, err := s.s3.CreateBucket(&s3.CreateBucketInput{Bucket: &s.bucket}) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (s *ks3) Head(key string) (Object, error) { + param := s3.HeadObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + + r, err := s.s3.HeadObject(¶m) + if err != nil { + return nil, err + } + + return &obj{ + key, + *r.ContentLength, + *r.LastModified, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *ks3) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + params.Range = &r + } + resp, err := s.s3.GetObject(params) + if err != nil { + return nil, err + } + return resp.Body, nil +} + +func (s *ks3) Put(key string, in io.Reader) error { + var body io.ReadSeeker + if b, ok := in.(io.ReadSeeker); ok { + body = b + } else { + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + body = bytes.NewReader(data) + } + params := &s3.PutObjectInput{ + Bucket: &s.bucket, + Key: &key, + Body: body, + } + _, err := s.s3.PutObject(params) + return err +} +func (s *ks3) Copy(dst, src string) error { + src = s.bucket + "/" + src + params := &s3.CopyObjectInput{ + Bucket: &s.bucket, + Key: &dst, + CopySource: &src, + } + _, err := s.s3.CopyObject(params) + return err +} + +func (s *ks3) Delete(key string) error { + param := s3.DeleteObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + _, err := s.s3.DeleteObject(¶m) + return err +} + +func (s *ks3) List(prefix, marker string, limit int64) ([]Object, error) { + param := s3.ListObjectsInput{ + Bucket: &s.bucket, + Prefix: &prefix, + Marker: &marker, + MaxKeys: &limit, + } + resp, err := s.s3.ListObjects(¶m) + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + objs[i] = &obj{*o.Key, *o.Size, *o.LastModified, strings.HasSuffix(*o.Key, "/")} + } + return objs, nil +} + +func (s *ks3) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (s *ks3) CreateMultipartUpload(key string) (*MultipartUpload, error) { + params := &s3.CreateMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + } + resp, err := s.s3.CreateMultipartUpload(params) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: *resp.UploadID, MinPartSize: 5 << 20, MaxCount: 10000}, nil +} + +func (s *ks3) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + n := int64(num) + params := &s3.UploadPartInput{ + Bucket: &s.bucket, + Key: &key, + UploadID: &uploadID, + Body: bytes.NewReader(body), + PartNumber: &n, + } + resp, err := s.s3.UploadPart(params) + if err != nil { + return nil, err + } + return &Part{Num: num, ETag: *resp.ETag}, nil +} + +func (s *ks3) AbortUpload(key string, uploadID string) { + params := &s3.AbortMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadID: &uploadID, + } + _, _ = s.s3.AbortMultipartUpload(params) +} + +func (s *ks3) CompleteUpload(key string, uploadID string, parts []*Part) error { + var s3Parts []*s3.CompletedPart + for i := range parts { + n := new(int64) + *n = int64(parts[i].Num) + s3Parts = append(s3Parts, &s3.CompletedPart{ETag: &parts[i].ETag, PartNumber: n}) + } + params := &s3.CompleteMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadID: &uploadID, + MultipartUpload: &s3.CompletedMultipartUpload{Parts: s3Parts}, + } + _, err := s.s3.CompleteMultipartUpload(params) + return err +} + +func (s *ks3) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &s3.ListMultipartUploadsInput{ + Bucket: aws.String(s.bucket), + KeyMarker: aws.String(marker), + } + // FIXME: parsing time "2018-08-23T12:23:26.046+08:00" as "2006-01-02T15:04:05Z" + result, err := s.s3.ListMultipartUploads(input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{*u.Key, *u.UploadID, *u.Initiated} + } + var nextMarker string + if result.NextKeyMarker != nil { + nextMarker = *result.NextKeyMarker + } + return parts, nextMarker, nil +} + +var ks3Regions = map[string]string{ + "cn-beijing": "BEIJING", + "cn-shanghai": "SHANGHAI", + "cn-guangzhou": "GUANGZHOU", + "cn-qingdao": "QINGDAO", + "jr-beijing": "JR_BEIJING", + "jr-shanghai": "JR_SHANGHAI", + "": "HANGZHOU", + "cn-hk-1": "HONGKONG", + "rus": "RUSSIA", + "sgp": "SINGAPORE", +} + +func newKS3(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, _ := url.ParseRequestURI(endpoint) + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[1][3:] + region = strings.TrimLeft(region, "-") + if strings.HasSuffix(uri.Host, "ksyun.com") { + region = strings.TrimSuffix(region, "-internal") + region = ks3Regions[region] + } + + var err error + accessKey, err = url.PathUnescape(accessKey) + if err != nil { + return nil, fmt.Errorf("unescape access key: %s", err) + } + secretKey, err = url.PathUnescape(secretKey) + if err != nil { + return nil, fmt.Errorf("unescape secret key: %s", err) + } + awsConfig := &aws.Config{ + Region: region, + Endpoint: strings.SplitN(uri.Host, ".", 2)[1], + DisableSSL: !ssl, + HTTPClient: httpClient, + S3ForcePathStyle: true, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + return &ks3{bucket, s3.New(awsConfig), nil}, nil +} + +func init() { + Register("ks3", newKS3) +} diff --git a/pkg/object/mem.go b/pkg/object/mem.go new file mode 100644 index 0000000..e5a93e7 --- /dev/null +++ b/pkg/object/mem.go @@ -0,0 +1,171 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "sort" + "strings" + "sync" + "time" +) + +type mobj struct { + data []byte + mtime time.Time + mode os.FileMode + owner string + group string +} + +type memStore struct { + sync.Mutex + DefaultObjectStorage + name string + objects map[string]*mobj +} + +func (m *memStore) String() string { + return fmt.Sprintf("mem://%s/", m.name) +} + +func (m *memStore) Head(key string) (Object, error) { + m.Lock() + defer m.Unlock() + // Minimum length is 1. + if key == "" { + return nil, errors.New("object key cannot be empty") + } + o, ok := m.objects[key] + if !ok { + return nil, errors.New("not exists") + } + f := &file{ + obj{ + key, + int64(len(o.data)), + o.mtime, + strings.HasSuffix(key, "/"), + }, + o.owner, + o.group, + o.mode, + } + return f, nil +} + +func (m *memStore) Get(key string, off, limit int64) (io.ReadCloser, error) { + m.Lock() + defer m.Unlock() + // Minimum length is 1. + if key == "" { + return nil, errors.New("object key cannot be empty") + } + d, ok := m.objects[key] + if !ok { + return nil, errors.New("not exists") + } + data := d.data[off:] + if limit > 0 && limit < int64(len(data)) { + data = data[:limit] + } + return ioutil.NopCloser(bytes.NewBuffer(data)), nil +} + +func (m *memStore) Put(key string, in io.Reader) error { + m.Lock() + defer m.Unlock() + // Minimum length is 1. + if key == "" { + return errors.New("object key cannot be empty") + } + _, ok := m.objects[key] + if ok { + logger.Debugf("overwrite %s", key) + } + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + m.objects[key] = &mobj{data: data, mtime: time.Now()} + return nil +} + +func (m *memStore) Copy(dst, src string) error { + d, err := m.Get(src, 0, -1) + if err != nil { + return err + } + return m.Put(dst, d) +} + +func (m *memStore) Delete(key string) error { + m.Lock() + defer m.Unlock() + delete(m.objects, key) + return nil +} + +func (m *memStore) List(prefix, marker string, limit int64) ([]Object, error) { + m.Lock() + defer m.Unlock() + + objs := make([]Object, 0) + for k := range m.objects { + if strings.HasPrefix(k, prefix) && k > marker { + o := m.objects[k] + f := &file{ + obj{ + k, + int64(len(o.data)), + o.mtime, + strings.HasSuffix(k, "/"), + }, + o.owner, + o.group, + o.mode, + } + objs = append(objs, f) + } + } + sort.Slice(objs, func(i, j int) bool { + return objs[i].Key() < objs[j].Key() + }) + if int64(len(objs)) > limit { + objs = objs[:limit] + } + return objs, nil +} + +func (m *memStore) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func newMem(endpoint, accesskey, secretkey string) (ObjectStorage, error) { + store := &memStore{name: endpoint} + store.objects = make(map[string]*mobj) + return store, nil +} + +func init() { + Register("mem", newMem) +} diff --git a/pkg/object/minio.go b/pkg/object/minio.go new file mode 100644 index 0000000..8c627e2 --- /dev/null +++ b/pkg/object/minio.go @@ -0,0 +1,87 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "os" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type minio struct { + s3client +} + +func (m *minio) String() string { + return fmt.Sprintf("minio://%s/%s/", *m.s3client.ses.Config.Endpoint, m.s3client.bucket) +} + +func newMinio(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("http://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + ssl := strings.ToLower(uri.Scheme) == "https" + awsConfig := &aws.Config{ + Region: aws.String(awsDefaultRegion), + Endpoint: &uri.Host, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(true), + HTTPClient: httpClient, + } + if accessKey == "" { + accessKey = os.Getenv("MINIO_ACCESS_KEY") + } + if secretKey == "" { + secretKey = os.Getenv("MINIO_SECRET_KEY") + } + if accessKey != "" { + awsConfig.Credentials = credentials.NewStaticCredentials(accessKey, secretKey, "") + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, err + } + ses.Handlers.Build.PushFront(disableSha256Func) + + if len(uri.Path) < 2 { + return nil, fmt.Errorf("no bucket name provided in %s: %s", endpoint, err) + } + bucket := uri.Path[1:] + if strings.Contains(bucket, "/") && strings.HasPrefix(bucket, "minio/") { + bucket = bucket[len("minio/"):] + } + bucket = strings.Split(bucket, "/")[0] + return &minio{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("minio", newMinio) +} diff --git a/pkg/object/mss.go b/pkg/object/mss.go new file mode 100644 index 0000000..f944cbe --- /dev/null +++ b/pkg/object/mss.go @@ -0,0 +1,168 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "crypto/hmac" + "crypto/sha1" + "encoding/base64" + "encoding/xml" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +type Contents struct { + Key string + Size int64 + LastModified time.Time +} + +// ListObjectsOutput presents output for ListObjects. +type ListBucketResult struct { + Contents []*Contents + IsTruncated bool + Prefix string + Marker string + MaxKeys string + NextMarker string + CommonPrefixes string +} + +type mss struct { + RestfulStorage +} + +func (u *mss) String() string { + uri, _ := url.ParseRequestURI(u.endpoint) + return fmt.Sprintf("mss://%s/", uri.Host) +} + +var awskeys []string = []string{"x-amz-copy-source"} + +// RequestURL is fully url of api request +func mssSigner(req *http.Request, accessKey, secretKey, signName string) { + toSign := req.Method + "\n" + for _, n := range HEADER_NAMES { + toSign += req.Header.Get(n) + "\n" + } + for _, k := range awskeys { + if req.Header.Get(k) != "" { + toSign += k + ":" + req.Header.Get(k) + "\n" + } + } + bucket := strings.Split(req.URL.Host, ".")[0] + if req.Method == "GET" { + toSign += "/" + bucket + } + toSign += req.URL.Path + h := hmac.New(sha1.New, []byte(secretKey)) + _, _ = h.Write([]byte(toSign)) + sig := base64.StdEncoding.EncodeToString(h.Sum(nil)) + token := signName + " " + accessKey + ":" + sig + req.Header.Add("Authorization", token) +} + +func (c *mss) Copy(dst, src string) error { + uri, _ := url.ParseRequestURI(c.endpoint) + bucket := strings.Split(uri.Host, ".")[0] + source := fmt.Sprintf("%s/%s", bucket, src) + resp, err := c.request("PUT", dst, nil, map[string]string{ + "x-amz-copy-source": source, + }) + if err != nil { + return err + } + defer cleanup(resp) + if resp.StatusCode != 201 && resp.StatusCode != 200 { + return parseError(resp) + } + return nil +} + +func (c *mss) List(prefix, marker string, limit int64) ([]Object, error) { + uri, _ := url.ParseRequestURI(c.endpoint) + + query := url.Values{} + query.Add("prefix", prefix) + query.Add("marker", marker) + if limit > 1000 { + limit = 1000 + } + query.Add("max-keys", strconv.Itoa(int(limit))) + uri.RawQuery = query.Encode() + uri.Path = "/" + req, err := http.NewRequest("GET", uri.String(), nil) + if err != nil { + return nil, err + } + now := time.Now().UTC().Format(http.TimeFormat) + req.Header.Add("Date", now) + mssSigner(req, c.accessKey, c.secretKey, c.signName) + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return nil, parseError(resp) + } + if resp.ContentLength <= 0 || resp.ContentLength > (1<<31) { + return nil, fmt.Errorf("invalid content length: %d", resp.ContentLength) + } + data := make([]byte, resp.ContentLength) + if _, err := io.ReadFull(resp.Body, data); err != nil { + return nil, err + } + var out ListBucketResult + err = xml.Unmarshal(data, &out) + if err != nil { + return nil, err + } + objs := make([]Object, len(out.Contents)) + for i, item := range out.Contents { + objs[i] = &obj{ + item.Key, + item.Size, + item.LastModified, + strings.HasSuffix(item.Key, "/"), + } + } + return objs, nil +} + +func newMSS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + qs := &mss{RestfulStorage{ + endpoint: endpoint, + accessKey: accessKey, + secretKey: secretKey, + signName: "AWS", + signer: mssSigner, + }} + return qs, nil +} + +func init() { + Register("mss", newMSS) +} diff --git a/pkg/object/nos.go b/pkg/object/nos.go new file mode 100644 index 0000000..bc0ed76 --- /dev/null +++ b/pkg/object/nos.go @@ -0,0 +1,181 @@ +//go:build !nonos +// +build !nonos + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "strings" + "time" + + "github.com/NetEase-Object-Storage/nos-golang-sdk/config" + noslogger "github.com/NetEase-Object-Storage/nos-golang-sdk/logger" + "github.com/NetEase-Object-Storage/nos-golang-sdk/model" + "github.com/NetEase-Object-Storage/nos-golang-sdk/nosclient" +) + +type nos struct { + DefaultObjectStorage + bucket string + client *nosclient.NosClient +} + +func (s *nos) String() string { + return fmt.Sprintf("nos://%s/", s.bucket) +} + +func (s *nos) Head(key string) (Object, error) { + objectRequest := &model.ObjectRequest{ + Bucket: s.bucket, + Object: key, + } + r, err := s.client.GetObjectMetaData(objectRequest) + if err != nil { + return nil, err + } + lastModified := r.Metadata["Last-Modified"] + if lastModified == "" { + return nil, fmt.Errorf("cannot get last modified time") + } + mtime, _ := time.Parse(time.RFC1123, lastModified) + return &obj{ + key, + r.ContentLength, + mtime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *nos) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &model.GetObjectRequest{Bucket: s.bucket, Object: key} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + params.ObjRange = r + } + resp, err := s.client.GetObject(params) + if err != nil { + logger.Error(err) + return nil, err + } + return resp.Body, nil +} + +func (s *nos) Put(key string, in io.Reader) error { + var body io.ReadSeeker + switch body.(type) { + case io.ReadSeeker: + body = in.(io.ReadSeeker) + default: + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + body = bytes.NewReader(data) + } + params := &model.PutObjectRequest{ + Bucket: s.bucket, + Object: key, + Body: body, + } + _, err := s.client.PutObjectByStream(params) + return err +} + +func (s *nos) Copy(dst, src string) error { + params := &model.CopyObjectRequest{ + SrcBucket: s.bucket, + SrcObject: src, + DestBucket: s.bucket, + DestObject: dst, + } + return s.client.CopyObject(params) +} + +func (s *nos) Delete(key string) error { + param := model.ObjectRequest{ + Bucket: s.bucket, + Object: key, + } + return s.client.DeleteObject(¶m) +} + +func (s *nos) List(prefix, marker string, limit int64) ([]Object, error) { + param := model.ListObjectsRequest{ + Bucket: s.bucket, + Prefix: prefix, + Marker: marker, + MaxKeys: int(limit), + } + resp, err := s.client.ListObjects(¶m) + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + mtime, err := time.Parse("2006-01-02T15:04:05 +0800", o.LastModified) + if err == nil { + mtime = mtime.Add(-8 * time.Hour) + } + objs[i] = &obj{o.Key, o.Size, mtime, strings.HasSuffix(o.Key, "/")} + } + return objs, nil +} + +func newNOS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucket := hostParts[0] + + conf := &config.Config{ + Endpoint: hostParts[1], + AccessKey: accessKey, + SecretKey: secretKey, + + NosServiceConnectTimeout: 3, + NosServiceReadWriteTimeout: 60, + NosServiceMaxIdleConnection: 100, + + LogLevel: noslogger.LogLevel(noslogger.ERROR), + } + + nosClient, _ := nosclient.New(conf) + + return &nos{bucket: bucket, client: nosClient}, nil +} + +func init() { + Register("nos", newNOS) +} diff --git a/pkg/object/object_storage.go b/pkg/object/object_storage.go new file mode 100644 index 0000000..1eee70e --- /dev/null +++ b/pkg/object/object_storage.go @@ -0,0 +1,146 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "context" + "errors" + "fmt" + "os" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/utils" +) + +var ctx = context.Background() +var logger = utils.GetLogger("juicefs") + +var UserAgent = "JuiceFS" + +type MtimeChanger interface { + Chtimes(path string, mtime time.Time) error +} +type File interface { + Object + Owner() string + Group() string + Mode() os.FileMode +} + +type file struct { + obj + owner string + group string + mode os.FileMode +} + +func (f *file) Owner() string { return f.owner } +func (f *file) Group() string { return f.group } +func (f *file) Mode() os.FileMode { return f.mode } + +func MarshalObject(o Object) map[string]interface{} { + m := make(map[string]interface{}) + m["key"] = o.Key() + m["size"] = o.Size() + m["mtime"] = o.Mtime().UnixNano() + m["isdir"] = o.IsDir() + if f, ok := o.(File); ok { + m["mode"] = f.Mode() + m["owner"] = f.Owner() + m["group"] = f.Group() + } + return m +} + +func UnmarshalObject(m map[string]interface{}) Object { + mtime := time.Unix(0, int64(m["mtime"].(float64))) + o := obj{m["key"].(string), int64(m["size"].(float64)), mtime, m["isdir"].(bool)} + if _, ok := m["mode"]; ok { + f := file{o, m["owner"].(string), m["group"].(string), os.FileMode(m["mode"].(float64))} + return &f + } + return &o +} + +type FileSystem interface { + MtimeChanger + Chmod(path string, mode os.FileMode) error + Chown(path string, owner, group string) error +} + +var notSupported = errors.New("not supported") + +type DefaultObjectStorage struct{} + +func (s DefaultObjectStorage) Create() error { + return nil +} + +func (s DefaultObjectStorage) Head(key string) (Object, error) { + return nil, notSupported +} + +func (s DefaultObjectStorage) CreateMultipartUpload(key string) (*MultipartUpload, error) { + return nil, notSupported +} + +func (s DefaultObjectStorage) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + return nil, notSupported +} + +func (s DefaultObjectStorage) AbortUpload(key string, uploadID string) {} + +func (s DefaultObjectStorage) CompleteUpload(key string, uploadID string, parts []*Part) error { + return notSupported +} + +func (s DefaultObjectStorage) ListUploads(marker string) ([]*PendingPart, string, error) { + return nil, "", nil +} + +func (s DefaultObjectStorage) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, notSupported +} + +func (s DefaultObjectStorage) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +type Creator func(bucket, accessKey, secretKey string) (ObjectStorage, error) + +var storages = make(map[string]Creator) + +func Register(name string, register Creator) { + storages[name] = register +} + +func CreateStorage(name, endpoint, accessKey, secretKey string) (ObjectStorage, error) { + f, ok := storages[name] + if ok { + logger.Debugf("Creating %s storage at endpoint %s", name, endpoint) + return f(endpoint, accessKey, secretKey) + } + return nil, fmt.Errorf("invalid storage: %s", name) +} + +var bufPool = sync.Pool{ + New: func() interface{} { + buf := make([]byte, 32<<10) + return &buf + }, +} diff --git a/pkg/object/object_storage_test.go b/pkg/object/object_storage_test.go new file mode 100644 index 0000000..0b46466 --- /dev/null +++ b/pkg/object/object_storage_test.go @@ -0,0 +1,524 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/rand" + "crypto/rsa" + "encoding/json" + "fmt" + "io/ioutil" + "math" + "os" + "reflect" + "strings" + "testing" + "time" +) + +func get(s ObjectStorage, k string, off, limit int64) (string, error) { + r, err := s.Get(k, off, limit) + if err != nil { + return "", err + } + data, err := ioutil.ReadAll(r) + if err != nil { + return "", err + } + return string(data), nil +} + +func listAll(s ObjectStorage, prefix, marker string, limit int64) ([]Object, error) { + r, err := s.List(prefix, marker, limit) + if err == nil { + return r, nil + } + ch, err := s.ListAll(prefix, marker) + if err == nil { + objs := make([]Object, 0) + for obj := range ch { + if len(objs) < int(limit) { + objs = append(objs, obj) + } + } + return objs, nil + } + return nil, err +} + +// nolint:errcheck +func testStorage(t *testing.T, s ObjectStorage) { + if err := s.Create(); err != nil { + t.Fatalf("Can't create bucket %s: %s", s, err) + } + + s = WithPrefix(s, "unit-test/") + defer s.Delete("test") + k := "large" + defer s.Delete(k) + + _, err := s.Get("not_exists", 0, -1) + if err == nil { + t.Fatalf("Get should failed: %s", err) + } + + br := []byte("hello") + if err := s.Put("test", bytes.NewReader(br)); err != nil { + t.Fatalf("PUT failed: %s", err.Error()) + } + + if d, e := get(s, "test", 0, -1); d != "hello" { + t.Fatalf("expect hello, but got %v, error:%s", d, e) + } + if d, e := get(s, "test", 2, 3); d != "llo" { + t.Fatalf("expect llo, but got %v, error:%s", d, e) + } + if d, e := get(s, "test", 2, 2); d != "ll" { + t.Fatalf("expect ll, but got %v, error:%s", d, e) + } + if d, e := get(s, "test", 4, 2); d != "o" { + t.Errorf("out-of-range get: 'o', but got %v, error:%s", len(d), e) + } + switch s.(*withPrefix).os.(type) { + case FileSystem: + objs, err2 := listAll(s, "", "", 2) + if err2 == nil { + if len(objs) != 2 { + t.Fatalf("List should return 2 keys, but got %d", len(objs)) + } + if objs[0].Key() != "" { + t.Fatalf("First key should be empty string, but got %s", objs[0].Key()) + } + if objs[0].Size() != 0 { + t.Fatalf("First object size should be 0, but got %d", objs[0].Size()) + } + if objs[1].Key() != "test" { + t.Fatalf("First key should be test, but got %s", objs[1].Key()) + } + if !strings.Contains(s.String(), "encrypted") && objs[1].Size() != 5 { + t.Fatalf("Size of first key shold be 5, but got %v", objs[1].Size()) + } + now := time.Now() + if objs[1].Mtime().Before(now.Add(-30*time.Second)) || objs[1].Mtime().After(now.Add(time.Second*30)) { + t.Fatalf("Mtime of key should be within 10 seconds, but got %s", objs[1].Mtime().Sub(now)) + } + } else { + t.Fatalf("list failed: %s", err2.Error()) + } + + objs, err2 = listAll(s, "", "test2", 1) + if err2 != nil { + t.Fatalf("list3 failed: %s", err2.Error()) + } else if len(objs) != 0 { + t.Fatalf("list3 should not return anything, but got %d", len(objs)) + } + default: + objs, err2 := listAll(s, "", "", 1) + if err2 == nil { + if len(objs) != 1 { + t.Fatalf("List should return 1 keys, but got %d", len(objs)) + } + if objs[0].Key() != "test" { + t.Fatalf("First key should be test, but got %s", objs[0].Key()) + } + if !strings.Contains(s.String(), "encrypted") && objs[0].Size() != 5 { + t.Fatalf("Size of first key shold be 5, but got %v", objs[0].Size()) + } + now := time.Now() + if objs[0].Mtime().Before(now.Add(-30*time.Second)) || objs[0].Mtime().After(now.Add(time.Second*30)) { + t.Fatalf("Mtime of key should be within 10 seconds, but got %s", objs[0].Mtime().Sub(now)) + } + } else { + t.Fatalf("list failed: %s", err2.Error()) + } + + objs, err2 = listAll(s, "", "test2", 1) + if err2 != nil { + t.Fatalf("list3 failed: %s", err2.Error()) + } else if len(objs) != 0 { + t.Fatalf("list3 should not return anything, but got %d", len(objs)) + } + } + + f, _ := ioutil.TempFile("", "test") + f.Write([]byte("this is a file")) + f.Seek(0, 0) + os.Remove(f.Name()) + defer f.Close() + if err := s.Put("file", f); err != nil { + t.Fatalf("failed to put from file") + } else if _, err := s.Head("file"); err != nil { + t.Fatalf("file should exists") + } else { + s.Delete("file") + } + + if _, err := s.Head("test"); err != nil { + t.Fatalf("check exists failed: %s", err.Error()) + } + + if err := s.Delete("test"); err != nil { + t.Fatalf("delete failed: %s", err) + } + + if err := s.Delete("test"); err != nil { + t.Fatalf("delete non exists: %v", err) + } + + if uploader, err := s.CreateMultipartUpload(k); err == nil { + partSize := uploader.MinPartSize + uploadID := uploader.UploadID + defer s.AbortUpload(k, uploadID) + + part1, err := s.UploadPart(k, uploadID, 1, make([]byte, partSize)) + if err != nil { + t.Fatalf("UploadPart 1 failed: %s", err) + } + if pending, marker, err := s.ListUploads(""); err != nil { + t.Logf("ListMultipart fail: %s", err.Error()) + } else { + println(len(pending), marker) + } + part2Size := 1 << 20 + _, err = s.UploadPart(k, uploadID, 2, make([]byte, part2Size)) + if err != nil { + t.Fatalf("UploadPart 2 failed: %s", err) + } + part2Size = 2 << 20 + part2, err := s.UploadPart(k, uploadID, 2, make([]byte, part2Size)) + if err != nil { + t.Fatalf("UploadPart 2 failed: %s", err) + } + + if err := s.CompleteUpload(k, uploadID, []*Part{part1, part2}); err != nil { + t.Fatalf("CompleteMultipart failed: %s", err.Error()) + } + if in, err := s.Get(k, 0, -1); err != nil { + t.Fatalf("large not exists") + } else if d, err := ioutil.ReadAll(in); err != nil { + t.Fatalf("fail to read large file") + } else if len(d) != partSize+part2Size { + t.Fatalf("size of large file: %d != %d", len(d), partSize+part2Size) + } + } else { + t.Logf("%s does not support multipart upload: %s", s, err.Error()) + } + + // Copy empty objects + defer s.Delete("empty") + if err := s.Put("empty", bytes.NewReader([]byte{})); err != nil { + t.Fatalf("PUT empty object failed: %s", err.Error()) + } + + // Copy `/` suffixed object + defer s.Delete("slash/") + if err := s.Put("slash/", bytes.NewReader([]byte{})); err != nil { + t.Fatalf("PUT `/` suffixed object failed: %s", err.Error()) + } +} + +func TestMem(t *testing.T) { + m, _ := newMem("", "", "") + testStorage(t, m) +} + +func TestDisk(t *testing.T) { + s, _ := newDisk("/tmp/abc/", "", "") + testStorage(t, s) +} + +func TestQingStor(t *testing.T) { + if os.Getenv("QY_ACCESS_KEY") == "" { + t.SkipNow() + } + s, _ := newQingStor("https://test.pek3a.qingstor.com", + os.Getenv("QY_ACCESS_KEY"), os.Getenv("QY_SECRET_KEY")) + testStorage(t, s) + + //private cloud + if os.Getenv("PRIVATE_QY_ACCESS_KEY") == "" { + t.SkipNow() + } + s2, _ := newQingStor("http://test.jn1.is.shanhe.com", + os.Getenv("PRIVATE_QY_ACCESS_KEY"), os.Getenv("PRIVATE_QY_SECRET_KEY")) + testStorage(t, s2) +} + +func TestS3(t *testing.T) { + if os.Getenv("AWS_ACCESS_KEY_ID") == "" { + t.SkipNow() + } + s, _ := newS3(fmt.Sprintf("https://%s", os.Getenv("S3_TEST_BUCKET")), + os.Getenv("AWS_ACCESS_KEY_ID"), os.Getenv("AWS_SECRET_ACCESS_KEY")) + testStorage(t, s) +} + +func TestOSS(t *testing.T) { + if os.Getenv("ALICLOUD_ACCESS_KEY_ID") == "" { + t.SkipNow() + } + bucketName := "test" + if b := os.Getenv("OSS_TEST_BUCKET"); b != "" { + bucketName = b + } + s, _ := newOSS(fmt.Sprintf("https://%s", bucketName), + os.Getenv("ALICLOUD_ACCESS_KEY_ID"), os.Getenv("ALICLOUD_ACCESS_KEY_SECRET")) + testStorage(t, s) +} + +func TestUFile(t *testing.T) { + if os.Getenv("UCLOUD_PUBLIC_KEY") == "" { + t.SkipNow() + } + ufile, _ := newUFile("https://test.us-ca.ufileos.com", + os.Getenv("UCLOUD_PUBLIC_KEY"), os.Getenv("UCLOUD_PRIVATE_KEY")) + testStorage(t, ufile) +} + +func TestGS(t *testing.T) { + if os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") == "" { + t.SkipNow() + } + gs, _ := newGS("gs://zhijian-test/", "", "") + testStorage(t, gs) +} + +func TestQiniu(t *testing.T) { + if os.Getenv("QINIU_ACCESS_KEY") == "" { + t.SkipNow() + } + qiniu, _ := newQiniu("https://test.cn-east-1-s3.qiniu.com", + os.Getenv("QINIU_ACCESS_KEY"), os.Getenv("QINIU_SECRET_KEY")) + testStorage(t, qiniu) + qiniu, _ = newQiniu("https://test.cn-north-1-s3.qiniu.com", + os.Getenv("QINIU_ACCESS_KEY"), os.Getenv("QINIU_SECRET_KEY")) + testStorage(t, qiniu) +} + +func TestKS3(t *testing.T) { + if os.Getenv("KS3_ACCESS_KEY") == "" { + t.SkipNow() + } + ks3, _ := newKS3("https://test.kss.ksyun.com", + os.Getenv("KS3_ACCESS_KEY"), os.Getenv("KS3_SECRET_KEY")) + testStorage(t, ks3) +} + +func TestCOS(t *testing.T) { + if os.Getenv("COS_SECRETID") == "" { + t.SkipNow() + } + cos, _ := newCOS( + fmt.Sprintf("https://%s", os.Getenv("COS_TEST_BUCKET")), + os.Getenv("COS_SECRETID"), os.Getenv("COS_SECRETKEY")) + testStorage(t, cos) +} + +func TestAzure(t *testing.T) { + if os.Getenv("AZURE_STORAGE_ACCOUNT") == "" { + t.SkipNow() + } + abs, _ := newWabs("https://test-chunk.core.chinacloudapi.cn", + os.Getenv("AZURE_STORAGE_ACCOUNT"), os.Getenv("AZURE_STORAGE_KEY")) + testStorage(t, abs) +} + +func TestNOS(t *testing.T) { + if os.Getenv("NOS_ACCESS_KEY") == "" { + t.SkipNow() + } + nos, _ := newNOS("https://test.nos-eastchina1.126.net", + os.Getenv("NOS_ACCESS_KEY"), os.Getenv("NOS_SECRET_KEY")) + testStorage(t, nos) +} + +func TestMSS(t *testing.T) { + if os.Getenv("MSS_ACCESS_KEY") == "" { + t.SkipNow() + } + mss, _ := newMSS("https://test.mtmss.com", + os.Getenv("MSS_ACCESS_KEY"), os.Getenv("MSS_SECRET_KEY")) + testStorage(t, mss) +} + +func TestJSS(t *testing.T) { + if os.Getenv("JSS_ACCESS_KEY") == "" { + t.SkipNow() + } + jss, _ := newJSS("https://test.s3.cn-north-1.jcloudcs.com", + os.Getenv("JSS_ACCESS_KEY"), os.Getenv("JSS_SECRET_KEY")) + testStorage(t, jss) +} + +func TestSpeedy(t *testing.T) { + if os.Getenv("SPEEDY_ACCESS_KEY") == "" { + t.SkipNow() + } + cos, _ := newSpeedy("https://test.oss-cn-beijing.speedycloud.org", + os.Getenv("SPEEDY_ACCESS_KEY"), os.Getenv("SPEEDY_SECRET_KEY")) + testStorage(t, cos) +} + +func TestB2(t *testing.T) { + if os.Getenv("B2_ACCOUNT_ID") == "" { + t.SkipNow() + } + b, err := newB2("https://jfs-test.backblaze.com", os.Getenv("B2_ACCOUNT_ID"), os.Getenv("B2_APP_KEY")) + if err != nil { + t.Fatalf("create B2: %s", err) + } + testStorage(t, b) +} + +func TestSpace(t *testing.T) { + if os.Getenv("SPACE_ACCESS_KEY") == "" { + t.SkipNow() + } + b, _ := newSpace("https://test.nyc3.digitaloceanspaces.com", os.Getenv("SPACE_ACCESS_KEY"), os.Getenv("SPACE_SECRET_KEY")) + testStorage(t, b) +} + +func TestBOS(t *testing.T) { + if os.Getenv("BDCLOUD_ACCESS_KEY") == "" { + t.SkipNow() + } + b, _ := newBOS(fmt.Sprintf("https://%s", os.Getenv("BOS_TEST_BUCKET")), + os.Getenv("BDCLOUD_ACCESS_KEY"), os.Getenv("BDCLOUD_SECRET_KEY")) + testStorage(t, b) +} + +func TestSftp(t *testing.T) { + if os.Getenv("SFTP_HOST") == "" { + t.SkipNow() + } + b, _ := newSftp(os.Getenv("SFTP_HOST"), os.Getenv("SFTP_USER"), os.Getenv("SFTP_PASS")) + testStorage(t, b) +} + +func TestOBS(t *testing.T) { + if os.Getenv("HWCLOUD_ACCESS_KEY") == "" { + t.SkipNow() + } + b, _ := newOBS(fmt.Sprintf("https://%s", os.Getenv("OBS_TEST_BUCKET")), + os.Getenv("HWCLOUD_ACCESS_KEY"), os.Getenv("HWCLOUD_SECRET_KEY")) + testStorage(t, b) +} + +func TestHDFS(t *testing.T) { + if os.Getenv("HDFS_ADDR") == "" { + t.Skip() + } + dfs, _ := newHDFS(os.Getenv("HDFS_ADDR"), "", "") + testStorage(t, dfs) +} + +func TestOOS(t *testing.T) { + if os.Getenv("OOS_ACCESS_KEY") == "" { + t.SkipNow() + } + b, _ := newOOS(fmt.Sprintf("https://%s", os.Getenv("OOS_TEST_BUCKET")), + os.Getenv("OOS_ACCESS_KEY"), os.Getenv("OOS_SECRET_KEY")) + testStorage(t, b) +} + +func TestScw(t *testing.T) { + if os.Getenv("SCW_ACCESS_KEY") == "" { + t.SkipNow() + } + b, _ := newScw(fmt.Sprintf("https://%s", os.Getenv("SCW_TEST_BUCKET")), os.Getenv("SCW_ACCESS_KEY"), os.Getenv("SCW_SECRET_KEY")) + testStorage(t, b) +} + +func TestMinIO(t *testing.T) { + if os.Getenv("MINIO_TEST_BUCKET") == "" { + t.SkipNow() + } + b, _ := newMinio(fmt.Sprintf("http://%s/some/path", os.Getenv("MINIO_TEST_BUCKET")), "", "") + testStorage(t, b) +} + +// func TestUpYun(t *testing.T) { +// s, _ := newUpyun("http://jfstest", "test", "") +// testStorage(t, s) +// } + +func TestYovole(t *testing.T) { + if os.Getenv("OS2_TEST_BUCKET") == "" { + t.SkipNow() + } + s, _ := newYovole(os.Getenv("OS2_TEST_BUCKET"), os.Getenv("OS2_ACCESS_KEY"), os.Getenv("OS2_SECRET_KEY")) + testStorage(t, s) +} + +func TestWebDAV(t *testing.T) { + if os.Getenv("WEBDAV_TEST_BUCKET") == "" { + t.SkipNow() + } + s, _ := newWebDAV(os.Getenv("WEBDAV_TEST_BUCKET"), "", "") + testStorage(t, s) +} + +func TestEncrypted(t *testing.T) { + s, _ := CreateStorage("mem", "", "", "") + privkey, _ := rsa.GenerateKey(rand.Reader, 2048) + kc := NewRSAEncryptor(privkey) + dc := NewAESEncryptor(kc) + es := NewEncrypted(s, dc) + testStorage(t, es) +} + +func TestMarsharl(t *testing.T) { + if os.Getenv("HDFS_ADDR") == "" { + t.Skip() + } + s, _ := newHDFS(os.Getenv("HDFS_ADDR"), "", "") + _ = s.Put("hello", bytes.NewReader([]byte("world"))) + fs := s.(FileSystem) + _ = fs.Chown("hello", "user", "group") + _ = fs.Chmod("hello", 0764) + o, _ := s.Head("hello") + + m := MarshalObject(o) + d, _ := json.Marshal(m) + var m2 map[string]interface{} + if err := json.Unmarshal(d, &m2); err != nil { + t.Fatalf("unmarshal: %s", err) + } + o2 := UnmarshalObject(m2) + if math.Abs(float64(o2.Mtime().UnixNano()-o.Mtime().UnixNano())) > 1000 { + t.Fatalf("mtime %s != %s", o2.Mtime(), o.Mtime()) + } + o2.(*file).mtime = o.Mtime() + if !reflect.DeepEqual(o, o2) { + t.Fatalf("%+v != %+v", o2, o) + } +} + +func TestSharding(t *testing.T) { + s, _ := NewSharded("mem", "%d", "", "", 10) + testStorage(t, s) +} + +func TestNameString(t *testing.T) { + s, _ := newMem("test", "", "") + s = WithPrefix(s, "a/") + s = WithPrefix(s, "b/") + if s.String() != "mem://test/a/b/" { + t.Fatalf("name with two prefix does not match: %s", s.String()) + } +} diff --git a/pkg/object/obs.go b/pkg/object/obs.go new file mode 100644 index 0000000..f3f180c --- /dev/null +++ b/pkg/object/obs.go @@ -0,0 +1,325 @@ +//go:build !noobs +// +build !noobs + +/* + * JuiceFS, Copyright 2019 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/md5" + "encoding/base64" + "fmt" + "io" + "io/ioutil" + "net/url" + "os" + "strings" + + "github.com/huaweicloud/huaweicloud-sdk-go-obs/obs" + "golang.org/x/net/http/httpproxy" +) + +const obsDefaultRegion = "cn-north-1" + +type obsClient struct { + bucket string + region string + c *obs.ObsClient +} + +func (s *obsClient) String() string { + return fmt.Sprintf("obs://%s/", s.bucket) +} + +func (s *obsClient) Create() error { + params := &obs.CreateBucketInput{} + params.Bucket = s.bucket + params.Location = s.region + _, err := s.c.CreateBucket(params) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (s *obsClient) Head(key string) (Object, error) { + params := &obs.GetObjectMetadataInput{ + Bucket: s.bucket, + Key: key, + } + r, err := s.c.GetObjectMetadata(params) + if err != nil { + return nil, err + } + return &obj{ + key, + r.ContentLength, + r.LastModified, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *obsClient) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &obs.GetObjectInput{} + params.Bucket = s.bucket + params.Key = key + params.RangeStart = off + if limit > 0 { + params.RangeEnd = off + limit - 1 + } + resp, err := s.c.GetObject(params) + if err != nil { + return nil, err + } + return resp.Body, nil +} + +func (s *obsClient) Put(key string, in io.Reader) error { + var body io.ReadSeeker + var vlen int64 + var sum []byte + if b, ok := in.(io.ReadSeeker); ok { + var err error + h := md5.New() + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + vlen, err = io.CopyBuffer(h, in, *buf) + if err != nil { + return err + } + _, err = b.Seek(0, io.SeekStart) + if err != nil { + return err + } + sum = h.Sum(nil) + body = b + } else { + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + vlen = int64(len(data)) + s := md5.Sum(data) + sum = s[:] + body = bytes.NewReader(data) + } + + params := &obs.PutObjectInput{} + params.Bucket = s.bucket + params.Key = key + params.Body = body + params.ContentLength = vlen + params.ContentMD5 = base64.StdEncoding.EncodeToString(sum[:]) + + _, err := s.c.PutObject(params) + return err +} + +func (s *obsClient) Copy(dst, src string) error { + params := &obs.CopyObjectInput{} + params.Bucket = s.bucket + params.Key = dst + params.CopySourceBucket = s.bucket + params.CopySourceKey = src + _, err := s.c.CopyObject(params) + return err +} + +func (s *obsClient) Delete(key string) error { + params := obs.DeleteObjectInput{} + params.Bucket = s.bucket + params.Key = key + _, err := s.c.DeleteObject(¶ms) + return err +} + +func (s *obsClient) List(prefix, marker string, limit int64) ([]Object, error) { + input := &obs.ListObjectsInput{ + Bucket: s.bucket, + Marker: marker, + } + input.Prefix = prefix + input.MaxKeys = int(limit) + resp, err := s.c.ListObjects(input) + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + objs[i] = &obj{o.Key, o.Size, o.LastModified, strings.HasSuffix(o.Key, "/")} + } + return objs, nil +} + +func (s *obsClient) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (s *obsClient) CreateMultipartUpload(key string) (*MultipartUpload, error) { + params := &obs.InitiateMultipartUploadInput{} + params.Bucket = s.bucket + params.Key = key + resp, err := s.c.InitiateMultipartUpload(params) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil +} + +func (s *obsClient) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + params := &obs.UploadPartInput{} + params.Bucket = s.bucket + params.Key = key + params.UploadId = uploadID + params.Body = bytes.NewReader(body) + params.PartNumber = num + params.PartSize = int64(len(body)) + sum := md5.Sum(body) + params.ContentMD5 = base64.StdEncoding.EncodeToString(sum[:]) + resp, err := s.c.UploadPart(params) + if err != nil { + return nil, err + } + return &Part{Num: num, ETag: resp.ETag}, nil +} + +func (s *obsClient) AbortUpload(key string, uploadID string) { + params := &obs.AbortMultipartUploadInput{} + params.Bucket = s.bucket + params.Key = key + params.UploadId = uploadID + _, _ = s.c.AbortMultipartUpload(params) +} + +func (s *obsClient) CompleteUpload(key string, uploadID string, parts []*Part) error { + params := &obs.CompleteMultipartUploadInput{} + params.Bucket = s.bucket + params.Key = key + params.UploadId = uploadID + for i := range parts { + params.Parts = append(params.Parts, obs.Part{ETag: parts[i].ETag, PartNumber: parts[i].Num}) + } + _, err := s.c.CompleteMultipartUpload(params) + return err +} + +func (s *obsClient) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &obs.ListMultipartUploadsInput{ + Bucket: s.bucket, + KeyMarker: marker, + } + + result, err := s.c.ListMultipartUploads(input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{u.Key, u.UploadId, u.Initiated} + } + var nextMarker string + if result.NextKeyMarker != "" { + nextMarker = result.NextKeyMarker + } + return parts, nextMarker, nil +} + +func autoOBSEndpoint(bucketName, accessKey, secretKey string) (string, error) { + region := obsDefaultRegion + if r := os.Getenv("HWCLOUD_DEFAULT_REGION"); r != "" { + region = r + } + endpoint := fmt.Sprintf("https://obs.%s.myhuaweicloud.com", region) + + obsCli, err := obs.New(accessKey, secretKey, endpoint) + if err != nil { + return "", err + } + defer obsCli.Close() + + result, err := obsCli.ListBuckets(&obs.ListBucketsInput{QueryLocation: true}) + if err != nil { + return "", err + } + for _, bucket := range result.Buckets { + if bucket.Name == bucketName { + logger.Debugf("Get location of bucket %q: %s", bucketName, bucket.Location) + return fmt.Sprintf("obs.%s.myhuaweicloud.com", bucket.Location), nil + } + } + return "", fmt.Errorf("bucket %q does not exist", bucketName) +} + +func newOBS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid endpoint %s: %q", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucketName := hostParts[0] + if len(hostParts) > 1 { + endpoint = fmt.Sprintf("%s://%s", uri.Scheme, hostParts[1]) + } + + if accessKey == "" { + accessKey = os.Getenv("HWCLOUD_ACCESS_KEY") + secretKey = os.Getenv("HWCLOUD_SECRET_KEY") + } + + var region string + if len(hostParts) == 1 { + if endpoint, err = autoOBSEndpoint(bucketName, accessKey, secretKey); err != nil { + return nil, fmt.Errorf("cannot get location of bucket %s: %q", bucketName, err) + } + if !strings.HasPrefix(endpoint, "http") { + endpoint = fmt.Sprintf("%s://%s", uri.Scheme, endpoint) + } + } else { + region = strings.Split(hostParts[1], ".")[1] + } + + // Use proxy setting from environment variables: HTTP_PROXY, HTTPS_PROXY, NO_PROXY + if uri, err = url.ParseRequestURI(endpoint); err != nil { + return nil, fmt.Errorf("invalid endpoint %s: %q", endpoint, err) + } + proxyURL, err := httpproxy.FromEnvironment().ProxyFunc()(uri) + if err != nil { + return nil, fmt.Errorf("get proxy url for endpoint: %s error: %q", endpoint, err) + } + var urlString string + if proxyURL != nil { + urlString = proxyURL.String() + } + + // Empty proxy url string has no effect + // there is a bug in the retry of PUT (did not call Seek(0,0) before retry), so disable the retry here + c, err := obs.New(accessKey, secretKey, endpoint, obs.WithProxyUrl(urlString), obs.WithMaxRetryCount(0)) + if err != nil { + return nil, fmt.Errorf("fail to initialize OBS: %q", err) + } + return &obsClient{bucketName, region, c}, nil +} + +func init() { + Register("obs", newOBS) +} diff --git a/pkg/object/oos.go b/pkg/object/oos.go new file mode 100644 index 0000000..eb22fbb --- /dev/null +++ b/pkg/object/oos.go @@ -0,0 +1,94 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type oos struct { + s3client +} + +func (s *oos) String() string { + return fmt.Sprintf("oos://%s/", s.s3client.bucket) +} + +func (s *oos) Create() error { + _, err := s.List("", "", 1) + if err != nil { + return fmt.Errorf("please create bucket %s manually", s.s3client.bucket) + } + return err +} + +func (s *oos) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + objs, err := s.s3client.List(prefix, marker, limit) + if marker != "" && len(objs) > 0 && objs[0].Key() == marker { + objs = objs[1:] + } + return objs, err +} + +func newOOS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[1][4:] + endpoint = uri.Host[len(bucket)+1:] + forcePathStyle := strings.Contains(strings.ToLower(endpoint), "xstore.ctyun.cn") + + awsConfig := &aws.Config{ + Region: ®ion, + Endpoint: &endpoint, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(!forcePathStyle), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("OOS session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &oos{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("oos", newOOS) +} diff --git a/pkg/object/oss.go b/pkg/object/oss.go new file mode 100644 index 0000000..8f3fe11 --- /dev/null +++ b/pkg/object/oss.go @@ -0,0 +1,403 @@ +//go:build !nooss +// +build !nooss + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/url" + "os" + "strconv" + "strings" + "time" + + "github.com/aliyun/aliyun-oss-go-sdk/oss" +) + +const ossDefaultRegionID = "cn-hangzhou" + +type ossClient struct { + client *oss.Client + bucket *oss.Bucket +} + +func (o *ossClient) String() string { + return fmt.Sprintf("oss://%s/", o.bucket.BucketName) +} + +func (o *ossClient) Create() error { + err := o.bucket.Client.CreateBucket(o.bucket.BucketName) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (o *ossClient) checkError(err error) error { + if err == nil { + return nil + } + msg := err.Error() + if strings.Contains(msg, "InvalidAccessKeyId") || strings.Contains(msg, "SecurityTokenExpired") { + logger.Warnf("refresh security token: %s", err) + go o.refreshToken() + } + return err +} + +func (o *ossClient) Head(key string) (Object, error) { + r, err := o.bucket.GetObjectMeta(key) + if o.checkError(err) != nil { + return nil, err + } + + lastModified := r.Get("Last-Modified") + if lastModified == "" { + return nil, fmt.Errorf("cannot get last modified time") + } + contentLength := r.Get("Content-Length") + mtime, _ := time.Parse(time.RFC1123, lastModified) + size, _ := strconv.ParseInt(contentLength, 10, 64) + return &obj{ + key, + size, + mtime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (o *ossClient) Get(key string, off, limit int64) (resp io.ReadCloser, err error) { + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("%d-", off) + } + resp, err = o.bucket.GetObject(key, oss.NormalizedRange(r), oss.RangeBehavior("standard")) + } else { + resp, err = o.bucket.GetObject(key) + if err == nil { + resp = verifyChecksum(resp, + resp.(*oss.Response).Headers.Get(oss.HTTPHeaderOssMetaPrefix+checksumAlgr)) + } + } + err = o.checkError(err) + return +} + +func (o *ossClient) Put(key string, in io.Reader) error { + if ins, ok := in.(io.ReadSeeker); ok { + option := oss.Meta(checksumAlgr, generateChecksum(ins)) + return o.checkError(o.bucket.PutObject(key, in, option)) + } + return o.checkError(o.bucket.PutObject(key, in)) +} + +func (o *ossClient) Copy(dst, src string) error { + _, err := o.bucket.CopyObject(src, dst) + return o.checkError(err) +} + +func (o *ossClient) Delete(key string) error { + return o.checkError(o.bucket.DeleteObject(key)) +} + +func (o *ossClient) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + result, err := o.bucket.ListObjects(oss.Prefix(prefix), + oss.Marker(marker), oss.MaxKeys(int(limit))) + if o.checkError(err) != nil { + return nil, err + } + n := len(result.Objects) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := result.Objects[i] + objs[i] = &obj{o.Key, o.Size, o.LastModified, strings.HasSuffix(o.Key, "/")} + } + return objs, nil +} + +func (o *ossClient) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (o *ossClient) CreateMultipartUpload(key string) (*MultipartUpload, error) { + r, err := o.bucket.InitiateMultipartUpload(key) + if o.checkError(err) != nil { + return nil, err + } + return &MultipartUpload{UploadID: r.UploadID, MinPartSize: 4 << 20, MaxCount: 10000}, nil +} + +func (o *ossClient) UploadPart(key string, uploadID string, num int, data []byte) (*Part, error) { + initResult := oss.InitiateMultipartUploadResult{ + Key: key, + UploadID: uploadID, + } + r, err := o.bucket.UploadPart(initResult, bytes.NewReader(data), int64(len(data)), num) + if o.checkError(err) != nil { + return nil, err + } + return &Part{Num: num, ETag: r.ETag}, nil +} + +func (o *ossClient) AbortUpload(key string, uploadID string) { + initResult := oss.InitiateMultipartUploadResult{ + Key: key, + UploadID: uploadID, + } + _ = o.bucket.AbortMultipartUpload(initResult) +} + +func (o *ossClient) CompleteUpload(key string, uploadID string, parts []*Part) error { + initResult := oss.InitiateMultipartUploadResult{ + Key: key, + UploadID: uploadID, + } + oparts := make([]oss.UploadPart, len(parts)) + for i, p := range parts { + oparts[i].PartNumber = p.Num + oparts[i].ETag = p.ETag + } + _, err := o.bucket.CompleteMultipartUpload(initResult, oparts) + return o.checkError(err) +} + +func (o *ossClient) ListUploads(marker string) ([]*PendingPart, string, error) { + result, err := o.bucket.ListMultipartUploads(oss.KeyMarker(marker)) + if o.checkError(err) != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{u.Key, u.UploadID, u.Initiated} + } + return parts, result.NextKeyMarker, nil +} + +type stsCred struct { + AccessKeyId string + AccessKeySecret string + Expiration string + SecurityToken string + LastUpdated string + Code string +} + +func fetch(url string) ([]byte, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + return ioutil.ReadAll(resp.Body) +} + +func fetchStsToken() (*stsCred, error) { + if cred, err := fetchStsCred(); err == nil { + return cred, nil + } + + // EMR MetaService: https://help.aliyun.com/document_detail/43966.html + url := "http://127.0.0.1:10011/" + token, err := fetch(url + "role-security-token") + if err != nil { + return nil, err + } + accessKey, err := fetch(url + "role-access-key-id") + if err != nil { + return nil, err + } + secretKey, err := fetch(url + "role-access-key-secret") + if err != nil { + return nil, err + } + return &stsCred{ + SecurityToken: string(token), + AccessKeyId: string(accessKey), + AccessKeySecret: string(secretKey), + Expiration: time.Now().Add(time.Hour * 24 * 100).Format("2006-01-02T15:04:05Z"), + }, nil +} + +func fetchStsCred() (*stsCred, error) { + url := "http://100.100.100.200/latest/meta-data/Ram/security-credentials/" + role, err := fetch(url) + if err != nil { + return nil, err + } + d, err := fetch(url + string(role)) + if err != nil { + return nil, err + } + var cred stsCred + err = json.Unmarshal(d, &cred) + return &cred, err +} + +func (o *ossClient) refreshToken() time.Time { + cred, err := fetchStsToken() + if err != nil { + logger.Errorf("refresh token: %s", err) + return time.Now().Add(time.Second) + } + o.client.Config.AccessKeyID = cred.AccessKeyId + o.client.Config.AccessKeySecret = cred.AccessKeySecret + o.client.Config.SecurityToken = cred.SecurityToken + logger.Debugf("Refreshed STS, will be expired at %s", cred.Expiration) + expire, err := time.Parse("2006-01-02T15:04:05Z", cred.Expiration) + if err != nil { + logger.Errorf("invalid expiration: %s, %s", cred.Expiration, err) + return time.Now().Add(time.Minute) + } + return expire +} + +func autoOSSEndpoint(bucketName, accessKey, secretKey, securityToken string) (string, error) { + var client *oss.Client + var err error + + regionID := ossDefaultRegionID + if rid := os.Getenv("ALICLOUD_REGION_ID"); rid != "" { + regionID = rid + } + defaultEndpoint := fmt.Sprintf("https://oss-%s.aliyuncs.com", regionID) + + if securityToken == "" { + if client, err = oss.New(defaultEndpoint, accessKey, secretKey); err != nil { + return "", err + } + } else { + if client, err = oss.New(defaultEndpoint, accessKey, secretKey, + oss.SecurityToken(securityToken)); err != nil { + return "", err + } + } + + result, err := client.ListBuckets(oss.Prefix(bucketName), oss.MaxKeys(1)) + if err != nil { + return "", err + } + if len(result.Buckets) == 0 { + return "", fmt.Errorf("cannot list bucket %q using endpoint %q", bucketName, defaultEndpoint) + } + + bucketLocation := result.Buckets[0].Location + // try oss internal endpoint + if conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s-internal.aliyuncs.com:http", + bucketLocation), time.Second*3); err == nil { + _ = conn.Close() + return fmt.Sprintf("http://%s-internal.aliyuncs.com", bucketLocation), nil + } + + return fmt.Sprintf("https://%s.aliyuncs.com", bucketLocation), nil +} + +func newOSS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucketName := hostParts[0] + + var domain string + if len(hostParts) > 1 { + domain = uri.Scheme + "://" + hostParts[1] + } + + securityToken := "" + if accessKey == "" { + // try environment variable + accessKey = os.Getenv("ALICLOUD_ACCESS_KEY_ID") + secretKey = os.Getenv("ALICLOUD_ACCESS_KEY_SECRET") + securityToken = os.Getenv("SECURITY_TOKEN") + + if accessKey == "" { + if cred, err := fetchStsToken(); err != nil { + return nil, fmt.Errorf("No credential provided for OSS") + } else { + accessKey = cred.AccessKeyId + secretKey = cred.AccessKeySecret + securityToken = cred.SecurityToken + } + } + } + + if domain == "" { + if domain, err = autoOSSEndpoint(bucketName, accessKey, secretKey, securityToken); err != nil { + return nil, fmt.Errorf("Unable to get endpoint of bucket %s: %s", bucketName, err) + } + logger.Debugf("Use endpoint %q", domain) + } + + var client *oss.Client + if securityToken == "" { + client, err = oss.New(domain, accessKey, secretKey) + } else { + client, err = oss.New(domain, accessKey, secretKey, oss.SecurityToken(securityToken)) + } + if err != nil { + return nil, fmt.Errorf("Cannot create OSS client with endpoint %s: %s", endpoint, err) + } + + client.Config.Timeout = 10 + client.Config.RetryTimes = 1 + client.Config.HTTPTimeout.ConnectTimeout = time.Second * 2 // 30s + client.Config.HTTPTimeout.ReadWriteTimeout = time.Second * 5 // 60s + client.Config.HTTPTimeout.HeaderTimeout = time.Second * 5 // 60s + client.Config.HTTPTimeout.LongTimeout = time.Second * 30 // 300s + client.Config.IsEnableCRC = false // CRC64ECMA is much slower than CRC32C + + bucket, err := client.Bucket(bucketName) + if err != nil { + return nil, fmt.Errorf("Cannot create bucket %s: %s", bucketName, err) + } + + o := &ossClient{client: client, bucket: bucket} + if securityToken != "" { + go func() { + for { + next := o.refreshToken() + time.Sleep(time.Until(next) / 2) + } + }() + } + return o, nil +} + +func init() { + Register("oss", newOSS) +} diff --git a/pkg/object/prefix.go b/pkg/object/prefix.go new file mode 100644 index 0000000..e220779 --- /dev/null +++ b/pkg/object/prefix.go @@ -0,0 +1,159 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "os" + "time" +) + +type withPrefix struct { + os ObjectStorage + prefix string +} + +// WithPrefix retuns a object storage that add a prefix to keys. +func WithPrefix(os ObjectStorage, prefix string) ObjectStorage { + return &withPrefix{os, prefix} +} + +func (p *withPrefix) String() string { + return fmt.Sprintf("%s%s", p.os, p.prefix) +} + +func (p *withPrefix) Create() error { + return p.os.Create() +} + +func (p *withPrefix) Head(key string) (Object, error) { + o, err := p.os.Head(p.prefix + key) + if err != nil { + return nil, err + } + switch po := o.(type) { + case *obj: + po.key = po.key[len(p.prefix):] + case *file: + po.key = po.key[len(p.prefix):] + } + return o, nil +} + +func (p *withPrefix) Get(key string, off, limit int64) (io.ReadCloser, error) { + return p.os.Get(p.prefix+key, off, limit) +} + +func (p *withPrefix) Put(key string, in io.Reader) error { + return p.os.Put(p.prefix+key, in) +} + +func (p *withPrefix) Delete(key string) error { + return p.os.Delete(p.prefix + key) +} + +func (p *withPrefix) List(prefix, marker string, limit int64) ([]Object, error) { + if marker != "" { + marker = p.prefix + marker + } + objs, err := p.os.List(p.prefix+prefix, marker, limit) + ln := len(p.prefix) + for _, o := range objs { + switch p := o.(type) { + case *obj: + p.key = p.key[ln:] + case *file: + p.key = p.key[ln:] + } + } + return objs, err +} + +func (p *withPrefix) ListAll(prefix, marker string) (<-chan Object, error) { + if marker != "" { + marker = p.prefix + marker + } + r, err := p.os.ListAll(p.prefix+prefix, marker) + if err != nil { + return r, err + } + r2 := make(chan Object, 10240) + ln := len(p.prefix) + go func() { + for o := range r { + if o != nil && o.Key() != "" { + switch p := o.(type) { + case *obj: + p.key = p.key[ln:] + case *file: + p.key = p.key[ln:] + } + } + r2 <- o + } + close(r2) + }() + return r2, nil +} + +func (p *withPrefix) Chmod(path string, mode os.FileMode) error { + if fs, ok := p.os.(FileSystem); ok { + return fs.Chmod(p.prefix+path, mode) + } + return nil +} + +func (p *withPrefix) Chown(path string, owner, group string) error { + if fs, ok := p.os.(FileSystem); ok { + return fs.Chown(p.prefix+path, owner, group) + } + return nil +} + +func (p *withPrefix) Chtimes(key string, mtime time.Time) error { + if fs, ok := p.os.(FileSystem); ok { + return fs.Chtimes(p.prefix+key, mtime) + } + return nil +} + +func (p *withPrefix) CreateMultipartUpload(key string) (*MultipartUpload, error) { + return p.os.CreateMultipartUpload(p.prefix + key) +} + +func (p *withPrefix) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + return p.os.UploadPart(p.prefix+key, uploadID, num, body) +} + +func (p *withPrefix) AbortUpload(key string, uploadID string) { + p.os.AbortUpload(p.prefix+key, uploadID) +} + +func (p *withPrefix) CompleteUpload(key string, uploadID string, parts []*Part) error { + return p.os.CompleteUpload(p.prefix+key, uploadID, parts) +} + +func (p *withPrefix) ListUploads(marker string) ([]*PendingPart, string, error) { + parts, nextMarker, err := p.os.ListUploads(marker) + for _, part := range parts { + part.Key = part.Key[len(p.prefix):] + } + return parts, nextMarker, err +} + +var _ ObjectStorage = &withPrefix{} diff --git a/pkg/object/qingstor.go b/pkg/object/qingstor.go new file mode 100644 index 0000000..cf60c36 --- /dev/null +++ b/pkg/object/qingstor.go @@ -0,0 +1,286 @@ +//go:build !noqingstore +// +build !noqingstore + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "os" + "strings" + "time" + + "github.com/qingstor/qingstor-sdk-go/v4/config" + qs "github.com/qingstor/qingstor-sdk-go/v4/service" +) + +type qingstor struct { + bucket *qs.Bucket +} + +func (q *qingstor) String() string { + return fmt.Sprintf("qingstor://%s/", *q.bucket.Properties.BucketName) +} + +func (q *qingstor) Create() error { + _, err := q.bucket.Put() + if err != nil && strings.Contains(err.Error(), "bucket_already_exists") { + err = nil + } + return err +} + +func (q *qingstor) Head(key string) (Object, error) { + r, err := q.bucket.HeadObject(key, nil) + if err != nil { + return nil, err + } + + return &obj{ + key, + *r.ContentLength, + *r.LastModified, + strings.HasSuffix(key, "/"), + }, nil +} + +func (q *qingstor) Get(key string, off, limit int64) (io.ReadCloser, error) { + input := &qs.GetObjectInput{} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + input.Range = &r + } + output, err := q.bucket.GetObject(key, input) + if err != nil { + return nil, err + } + return output.Body, nil +} + +func findLen(in io.Reader) (io.Reader, int64, error) { + var vlen int64 + switch v := in.(type) { + case *bytes.Buffer: + vlen = int64(v.Len()) + case *bytes.Reader: + vlen = int64(v.Len()) + case *strings.Reader: + vlen = int64(v.Len()) + case *os.File: + st, err := v.Stat() + if err != nil { + return nil, 0, err + } + vlen = st.Size() + case io.ReadSeeker: + var err error + vlen, err = v.Seek(0, 2) + if err != nil { + return nil, 0, err + } + if _, err = v.Seek(0, 0); err != nil { + return nil, 0, err + } + default: + d, err := ioutil.ReadAll(in) + if err != nil { + return nil, 0, err + } + vlen = int64(len(d)) + in = bytes.NewReader(d) + } + return in, vlen, nil +} + +func (q *qingstor) Put(key string, in io.Reader) error { + body, vlen, err := findLen(in) + if err != nil { + return err + } + input := &qs.PutObjectInput{Body: body, ContentLength: &vlen} + out, err := q.bucket.PutObject(key, input) + if err != nil { + return err + } + if *out.StatusCode != 201 { + return fmt.Errorf("unexpected code: %d", *out.StatusCode) + } + return nil +} + +func (q *qingstor) Copy(dst, src string) error { + source := fmt.Sprintf("/%s/%s", *q.bucket.Properties.BucketName, src) + input := &qs.PutObjectInput{ + XQSCopySource: &source, + } + out, err := q.bucket.PutObject(dst, input) + if err != nil { + return err + } + if *out.StatusCode != 201 { + return fmt.Errorf("unexpected code: %d", *out.StatusCode) + } + return nil +} + +func (q *qingstor) Delete(key string) error { + _, err := q.bucket.DeleteObject(key) + return err +} + +func (q *qingstor) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + limit_ := int(limit) + input := &qs.ListObjectsInput{ + Prefix: &prefix, + Marker: &marker, + Limit: &limit_, + } + out, err := q.bucket.ListObjects(input) + if err != nil { + return nil, err + } + n := len(out.Keys) + objs := make([]Object, n) + for i := 0; i < n; i++ { + k := out.Keys[i] + objs[i] = &obj{ + *k.Key, + *k.Size, + time.Unix(int64(*k.Modified), 0), + strings.HasSuffix(*k.Key, "/"), + } + } + return objs, nil +} + +func (q *qingstor) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (q *qingstor) CreateMultipartUpload(key string) (*MultipartUpload, error) { + r, err := q.bucket.InitiateMultipartUpload(key, nil) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: *r.UploadID, MinPartSize: 4 << 20, MaxCount: 10000}, nil +} + +func (q *qingstor) UploadPart(key string, uploadID string, num int, data []byte) (*Part, error) { + input := &qs.UploadMultipartInput{ + UploadID: &uploadID, + PartNumber: &num, + Body: bytes.NewReader(data), + } + r, err := q.bucket.UploadMultipart(key, input) + if err != nil { + return nil, err + } + return &Part{Num: num, Size: len(data), ETag: strings.Trim(*r.ETag, "\"")}, nil +} + +func (q *qingstor) AbortUpload(key string, uploadID string) { + input := &qs.AbortMultipartUploadInput{ + UploadID: &uploadID, + } + _, _ = q.bucket.AbortMultipartUpload(key, input) +} + +func (q *qingstor) CompleteUpload(key string, uploadID string, parts []*Part) error { + oparts := make([]*qs.ObjectPartType, len(parts)) + for i := range parts { + oparts[i] = &qs.ObjectPartType{ + PartNumber: &parts[i].Num, + Etag: &parts[i].ETag, + } + } + input := &qs.CompleteMultipartUploadInput{ + UploadID: &uploadID, + ObjectParts: oparts, + } + _, err := q.bucket.CompleteMultipartUpload(key, input) + return err +} + +func (q *qingstor) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &qs.ListMultipartUploadsInput{ + KeyMarker: &marker, + } + result, err := q.bucket.ListMultipartUploads(input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{*u.Key, *u.UploadID, *u.Created} + } + var nextMarker string + if result.NextKeyMarker != nil { + nextMarker = *result.NextKeyMarker + } + return parts, nextMarker, nil +} + +func newQingStor(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + var bucketName, zone, host string + if !strings.HasSuffix(uri.Host, "qingstor.com") { + // support private cloud + hostParts := strings.SplitN(uri.Host, ".", 2) + bucketName, zone, host = hostParts[0], "", hostParts[1] + } else { + hostParts := strings.SplitN(uri.Host, ".", 3) + bucketName, zone, host = hostParts[0], hostParts[1], hostParts[2] + } + conf, err := config.New(accessKey, secretKey) + if err != nil { + return nil, fmt.Errorf("Can't load config: %s", err.Error()) + } + conf.Host = host + conf.Protocol = uri.Scheme + if uri.Scheme == "http" { + conf.Port = 80 + } else { + conf.Port = 443 + } + conf.Connection = httpClient + qsService, _ := qs.Init(conf) + bucket, _ := qsService.Bucket(bucketName, zone) + return &qingstor{bucket: bucket}, nil +} + +func init() { + Register("qingstor", newQingStor) +} diff --git a/pkg/object/qiniu.go b/pkg/object/qiniu.go new file mode 100644 index 0000000..8e884b5 --- /dev/null +++ b/pkg/object/qiniu.go @@ -0,0 +1,231 @@ +//go:build !noqiniu && !nos3 +// +build !noqiniu,!nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/qiniu/api.v7/v7/auth/qbox" + "github.com/qiniu/api.v7/v7/storage" +) + +type qiniu struct { + s3client + bm *storage.BucketManager + mac *qbox.Mac + cfg *storage.Config + marker string +} + +func (q *qiniu) String() string { + return fmt.Sprintf("qiniu://%s/", q.bucket) +} + +func (q *qiniu) download(key string, off, limit int64) (io.ReadCloser, error) { + deadline := time.Now().Add(time.Second * 3600).Unix() + url := storage.MakePrivateURL(q.mac, os.Getenv("QINIU_DOMAIN"), key, deadline) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + now := time.Now().UTC().Format(http.TimeFormat) + req.Header.Add("Date", now) + if off > 0 || limit > 0 { + if limit > 0 { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", off, off+limit-1)) + } else { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-", off)) + } + } + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 && resp.StatusCode != 206 { + return nil, fmt.Errorf("Status code: %d", resp.StatusCode) + } + return resp.Body, nil +} + +func (q *qiniu) Head(key string) (Object, error) { + r, err := q.bm.Stat(q.bucket, key) + if err != nil { + return nil, err + } + + mtime := time.Unix(0, r.PutTime*100) + return &obj{ + key, + r.Fsize, + mtime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (q *qiniu) Get(key string, off, limit int64) (io.ReadCloser, error) { + // S3 SDK cannot get objects with prefix "/" in the key + if strings.HasPrefix(key, "/") && os.Getenv("QINIU_DOMAIN") != "" { + return q.download(key, off, limit) + } + for strings.HasPrefix(key, "/") { + key = key[1:] + } + // S3ForcePathStyle = true + return q.s3client.Get("/"+key, off, limit) +} + +func (q *qiniu) Put(key string, in io.Reader) error { + body, vlen, err := findLen(in) + if err != nil { + return err + } + putPolicy := storage.PutPolicy{Scope: q.bucket + ":" + key} + upToken := putPolicy.UploadToken(q.mac) + formUploader := storage.NewFormUploader(q.cfg) + var ret storage.PutRet + return formUploader.Put(ctx, &ret, upToken, key, body, vlen, nil) +} + +func (q *qiniu) Copy(dst, src string) error { + return q.bm.Copy(q.bucket, src, q.bucket, dst, true) +} + +func (q *qiniu) CreateMultipartUpload(key string) (*MultipartUpload, error) { + return nil, notSupported +} + +func (q *qiniu) Delete(key string) error { + return q.bm.Delete(q.bucket, key) +} + +func (q *qiniu) List(prefix, marker string, limit int64) ([]Object, error) { + if limit > 1000 { + limit = 1000 + } + if marker == "" { + q.marker = "" + } else if q.marker == "" { + // last page + return nil, nil + } + entries, _, markerOut, hasNext, err := q.bm.ListFiles(q.bucket, prefix, "", q.marker, int(limit)) + for err == nil && len(entries) == 0 && hasNext { + entries, _, markerOut, hasNext, err = q.bm.ListFiles(q.bucket, prefix, "", markerOut, int(limit)) + } + q.marker = markerOut + if len(entries) > 0 || err == io.EOF { + // ignore error if returned something + err = nil + } + if err != nil { + return nil, err + } + n := len(entries) + objs := make([]Object, n) + for i := 0; i < n; i++ { + entry := entries[i] + mtime := entry.PutTime / 10000000 + objs[i] = &obj{entry.Key, entry.Fsize, time.Unix(mtime, 0), strings.HasSuffix(entry.Key, "/")} + } + return objs, nil +} + +var publicRegions = map[string]*storage.Zone{ + "cn-east-1": &storage.ZoneHuadong, + "cn-north-1": &storage.ZoneHuabei, + "cn-south-1": &storage.ZoneHuanan, + "us-west-1": &storage.ZoneBeimei, + "ap-southeast-1": &storage.ZoneXinjiapo, +} + +func newQiniu(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucket := hostParts[0] + endpoint = hostParts[1] + var region string + if strings.HasPrefix(endpoint, "s3") { + // private region + region = endpoint[strings.Index(endpoint, "-")+1 : strings.Index(endpoint, ".")] + } else if strings.HasPrefix(endpoint, "qvm-") { + region = "cn-east-1" // internal + } else if strings.HasPrefix(endpoint, "qvm-z1") { + region = "cn-north-1" + } else { + region = endpoint[:strings.LastIndex(endpoint, "-")] + } + awsConfig := &aws.Config{ + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + Endpoint: &endpoint, + Region: ®ion, + DisableSSL: aws.Bool(uri.Scheme == "http"), + S3ForcePathStyle: aws.Bool(true), + HTTPClient: httpClient, + } + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + s3client := s3client{bucket, s3.New(ses), ses} + + cfg := storage.Config{ + UseHTTPS: uri.Scheme == "https", + } + zone, ok := publicRegions[region] + if !ok { + domain := strings.SplitN(endpoint, "-", 2)[1] + zone = &storage.Zone{ + RsHost: "rs-" + domain, + RsfHost: "rsf-" + domain, + ApiHost: "api-" + domain, + IovipHost: "io-" + domain, + SrcUpHosts: []string{"up-" + domain}, + } + } else if strings.HasPrefix(endpoint, "qvm-z1") { + zone.SrcUpHosts = []string{"free-qvm-z1-zz.qiniup.com"} + } else if strings.HasPrefix(endpoint, "qvm-") { + zone.SrcUpHosts = []string{"free-qvm-z0-xs.qiniup.com"} + } + cfg.Zone = zone + mac := qbox.NewMac(accessKey, secretKey) + bucketManager := storage.NewBucketManager(mac, &cfg) + return &qiniu{s3client, bucketManager, mac, &cfg, ""}, nil +} + +func init() { + Register("qiniu", newQiniu) +} diff --git a/pkg/object/redis.go b/pkg/object/redis.go new file mode 100644 index 0000000..76a7312 --- /dev/null +++ b/pkg/object/redis.go @@ -0,0 +1,89 @@ +//go:build !noredis +// +build !noredis + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + + "github.com/go-redis/redis/v8" +) + +// redisStore stores data chunks into Redis. +type redisStore struct { + DefaultObjectStorage + rdb *redis.Client +} + +var c = context.TODO() + +func (r *redisStore) String() string { + return fmt.Sprintf("redis://%s/", r.rdb.Options().Addr) +} + +func (r *redisStore) Create() error { + return nil +} + +func (r *redisStore) Get(key string, off, limit int64) (io.ReadCloser, error) { + data, err := r.rdb.Get(c, key).Bytes() + if err != nil { + return nil, err + } + data = data[off:] + if limit > 0 && limit < int64(len(data)) { + data = data[:limit] + } + return ioutil.NopCloser(bytes.NewBuffer(data)), nil +} + +func (r *redisStore) Put(key string, in io.Reader) error { + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + return r.rdb.Set(c, key, data, 0).Err() +} + +func (r *redisStore) Delete(key string) error { + return r.rdb.Del(c, key).Err() +} + +func newRedis(url, user, passwd string) (ObjectStorage, error) { + opt, err := redis.ParseURL(url) + if err != nil { + return nil, fmt.Errorf("parse %s: %s", url, err) + } + if user != "" { + opt.Username = user + } + if passwd != "" { + opt.Password = passwd + } + rdb := redis.NewClient(opt) + return &redisStore{DefaultObjectStorage{}, rdb}, nil +} + +func init() { + Register("redis", newRedis) +} diff --git a/pkg/object/redis_test.go b/pkg/object/redis_test.go new file mode 100644 index 0000000..c5d1d8c --- /dev/null +++ b/pkg/object/redis_test.go @@ -0,0 +1,41 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "io/ioutil" + "testing" +) + +func TestRedisStore(t *testing.T) { + s, err := newRedis("redis://127.0.0.1:6379/10", "", "") + if err != nil { + t.Fatalf("create: %s", err) + } + if err := s.Put("chunks/1", bytes.NewBuffer([]byte("data"))); err != nil { + t.Fatalf("put: %s", err) + } + if rb, err := s.Get("chunks/1", 0, -1); err != nil { + t.Fatalf("get : %s", err) + } else if d, err := ioutil.ReadAll(rb); err != nil || !bytes.Equal(d, []byte("data")) { + t.Fatalf("get: %s %s", err, string(d)) + } + if err := s.Delete("chunks/1"); err != nil { + t.Fatalf("delete: %s", err) + } +} diff --git a/pkg/object/restful.go b/pkg/object/restful.go new file mode 100644 index 0000000..f1725d9 --- /dev/null +++ b/pkg/object/restful.go @@ -0,0 +1,236 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/hmac" + "crypto/sha1" + "encoding/base64" + "errors" + "fmt" + "io" + "io/ioutil" + "math/rand" + "net" + "net/http" + "os" + "strings" + "time" + + "github.com/viki-org/dnscache" +) + +var resolver = dnscache.New(time.Minute) +var httpClient *http.Client + +func init() { + rand.Seed(time.Now().Unix()) + httpClient = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + TLSHandshakeTimeout: time.Second * 20, + ResponseHeaderTimeout: time.Second * 30, + IdleConnTimeout: time.Second * 300, + MaxIdleConnsPerHost: 500, + Dial: func(network string, address string) (net.Conn, error) { + separator := strings.LastIndex(address, ":") + host := address[:separator] + port := address[separator:] + ips, err := resolver.Fetch(host) + if err != nil { + return nil, err + } + if len(ips) == 0 { + return nil, fmt.Errorf("No such host: %s", host) + } + var conn net.Conn + n := len(ips) + first := rand.Intn(n) + dialer := &net.Dialer{Timeout: time.Second * 10} + for i := 0; i < n; i++ { + ip := ips[(first+i)%n] + address = ip.String() + if port != "" { + address = net.JoinHostPort(address, port[1:]) + } + conn, err = dialer.Dial(network, address) + if err == nil { + return conn, nil + } + } + return nil, err + }, + DisableCompression: true, + }, + Timeout: time.Hour, + } +} + +func cleanup(response *http.Response) { + if response != nil && response.Body != nil { + _, _ = ioutil.ReadAll(response.Body) + _ = response.Body.Close() + } +} + +type RestfulStorage struct { + DefaultObjectStorage + endpoint string + accessKey string + secretKey string + signName string + signer func(*http.Request, string, string, string) +} + +func (s *RestfulStorage) String() string { + return s.endpoint +} + +var HEADER_NAMES = []string{"Content-MD5", "Content-Type", "Date"} + +// RequestURL is fully url of api request +func sign(req *http.Request, accessKey, secretKey, signName string) { + if accessKey == "" { + return + } + toSign := req.Method + "\n" + for _, n := range HEADER_NAMES { + toSign += req.Header.Get(n) + "\n" + } + bucket := strings.Split(req.URL.Host, ".")[0] + toSign += "/" + bucket + req.URL.Path + h := hmac.New(sha1.New, []byte(secretKey)) + _, _ = h.Write([]byte(toSign)) + sig := base64.StdEncoding.EncodeToString(h.Sum(nil)) + token := signName + " " + accessKey + ":" + sig + req.Header.Add("Authorization", token) +} + +func (s *RestfulStorage) request(method, key string, body io.Reader, headers map[string]string) (*http.Response, error) { + uri := s.endpoint + "/" + key + req, err := http.NewRequest(method, uri, body) + if err != nil { + return nil, err + } + if f, ok := body.(*os.File); ok { + st, err := f.Stat() + if err == nil { + req.ContentLength = st.Size() + } + } + now := time.Now().UTC().Format(http.TimeFormat) + req.Header.Add("Date", now) + for key := range headers { + req.Header.Add(key, headers[key]) + } + s.signer(req, s.accessKey, s.secretKey, s.signName) + return httpClient.Do(req) +} + +func parseError(resp *http.Response) error { + data, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("request failed: %s", err) + } + return fmt.Errorf("status: %v, message: %s", resp.StatusCode, string(data)) +} + +func (s *RestfulStorage) Head(key string) (Object, error) { + resp, err := s.request("HEAD", key, nil, nil) + if err != nil { + return nil, err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return nil, parseError(resp) + } + + lastModified := resp.Header.Get("Last-Modified") + if lastModified == "" { + return nil, fmt.Errorf("cannot get last modified time") + } + mtime, _ := time.Parse(time.RFC1123, lastModified) + return &obj{ + key, + resp.ContentLength, + mtime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *RestfulStorage) Get(key string, off, limit int64) (io.ReadCloser, error) { + headers := make(map[string]string) + if off > 0 || limit > 0 { + if limit > 0 { + headers["Range"] = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + headers["Range"] = fmt.Sprintf("bytes=%d-", off) + } + } + resp, err := s.request("GET", key, nil, headers) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 && resp.StatusCode != 206 { + return nil, parseError(resp) + } + return resp.Body, nil +} + +func (u *RestfulStorage) Put(key string, body io.Reader) error { + resp, err := u.request("PUT", key, body, nil) + if err != nil { + return err + } + defer cleanup(resp) + if resp.StatusCode != 201 && resp.StatusCode != 200 { + return parseError(resp) + } + return nil +} + +func (s *RestfulStorage) Copy(dst, src string) error { + in, err := s.Get(src, 0, -1) + if err != nil { + return err + } + defer in.Close() + d, err := ioutil.ReadAll(in) + if err != nil { + return err + } + return s.Put(dst, bytes.NewReader(d)) +} + +func (s *RestfulStorage) Delete(key string) error { + resp, err := s.request("DELETE", key, nil, nil) + if err != nil { + return err + } + defer cleanup(resp) + if resp.StatusCode != 204 && resp.StatusCode != 404 { + return parseError(resp) + } + return nil +} + +func (s *RestfulStorage) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, errors.New("Not implemented") +} + +var _ ObjectStorage = &RestfulStorage{} diff --git a/pkg/object/s3.go b/pkg/object/s3.go new file mode 100644 index 0000000..17bf928 --- /dev/null +++ b/pkg/object/s3.go @@ -0,0 +1,426 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "os" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +const awsDefaultRegion = "us-east-1" + +var disableSha256Func = func(r *request.Request) { + if op := r.Operation.Name; r.ClientInfo.ServiceID != "S3" || !(op == "PutObject" || op == "UploadPart") { + return + } + if len(r.HTTPRequest.Header.Get("X-Amz-Content-Sha256")) != 0 { + return + } + r.HTTPRequest.Header.Set("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD") +} + +type s3client struct { + bucket string + s3 *s3.S3 + ses *session.Session +} + +func (s *s3client) String() string { + return fmt.Sprintf("s3://%s/", s.bucket) +} + +func isExists(err error) bool { + msg := err.Error() + return strings.Contains(msg, s3.ErrCodeBucketAlreadyExists) || strings.Contains(msg, s3.ErrCodeBucketAlreadyOwnedByYou) +} + +func (s *s3client) Create() error { + if _, err := s.List("", "", 1); err == nil { + return nil + } + _, err := s.s3.CreateBucket(&s3.CreateBucketInput{Bucket: &s.bucket}) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (s *s3client) Head(key string) (Object, error) { + param := s3.HeadObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + r, err := s.s3.HeadObject(¶m) + if err != nil { + return nil, err + } + return &obj{ + key, + *r.ContentLength, + *r.LastModified, + strings.HasSuffix(key, "/"), + }, nil +} + +func (s *s3client) Get(key string, off, limit int64) (io.ReadCloser, error) { + params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key} + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("bytes=%d-", off) + } + params.Range = &r + } + resp, err := s.s3.GetObject(params) + if err != nil { + return nil, err + } + if off == 0 && limit == -1 { + cs := resp.Metadata[checksumAlgr] + if cs != nil { + resp.Body = verifyChecksum(resp.Body, *cs) + } + } + return resp.Body, nil +} + +func (s *s3client) Put(key string, in io.Reader) error { + var body io.ReadSeeker + if b, ok := in.(io.ReadSeeker); ok { + body = b + } else { + data, err := ioutil.ReadAll(in) + if err != nil { + return err + } + body = bytes.NewReader(data) + } + checksum := generateChecksum(body) + params := &s3.PutObjectInput{ + Bucket: &s.bucket, + Key: &key, + Body: body, + Metadata: map[string]*string{checksumAlgr: &checksum}, + } + _, err := s.s3.PutObject(params) + return err +} + +func (s *s3client) Copy(dst, src string) error { + src = s.bucket + "/" + src + params := &s3.CopyObjectInput{ + Bucket: &s.bucket, + Key: &dst, + CopySource: &src, + } + _, err := s.s3.CopyObject(params) + return err +} + +func (s *s3client) Delete(key string) error { + param := s3.DeleteObjectInput{ + Bucket: &s.bucket, + Key: &key, + } + _, err := s.s3.DeleteObject(¶m) + return err +} + +func (s *s3client) List(prefix, marker string, limit int64) ([]Object, error) { + param := s3.ListObjectsInput{ + Bucket: &s.bucket, + Prefix: &prefix, + Marker: &marker, + MaxKeys: &limit, + } + resp, err := s.s3.ListObjects(¶m) + if err != nil { + return nil, err + } + n := len(resp.Contents) + objs := make([]Object, n) + for i := 0; i < n; i++ { + o := resp.Contents[i] + objs[i] = &obj{ + *o.Key, + *o.Size, + *o.LastModified, + strings.HasSuffix(*o.Key, "/"), + } + } + return objs, nil +} + +func (s *s3client) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (s *s3client) CreateMultipartUpload(key string) (*MultipartUpload, error) { + params := &s3.CreateMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + } + resp, err := s.s3.CreateMultipartUpload(params) + if err != nil { + return nil, err + } + return &MultipartUpload{UploadID: *resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil +} + +func (s *s3client) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + n := int64(num) + params := &s3.UploadPartInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + Body: bytes.NewReader(body), + PartNumber: &n, + } + resp, err := s.s3.UploadPart(params) + if err != nil { + return nil, err + } + return &Part{Num: num, ETag: *resp.ETag}, nil +} + +func (s *s3client) AbortUpload(key string, uploadID string) { + params := &s3.AbortMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + } + _, _ = s.s3.AbortMultipartUpload(params) +} + +func (s *s3client) CompleteUpload(key string, uploadID string, parts []*Part) error { + var s3Parts []*s3.CompletedPart + for i := range parts { + n := new(int64) + *n = int64(parts[i].Num) + s3Parts = append(s3Parts, &s3.CompletedPart{ETag: &parts[i].ETag, PartNumber: n}) + } + params := &s3.CompleteMultipartUploadInput{ + Bucket: &s.bucket, + Key: &key, + UploadId: &uploadID, + MultipartUpload: &s3.CompletedMultipartUpload{Parts: s3Parts}, + } + _, err := s.s3.CompleteMultipartUpload(params) + return err +} + +func (s *s3client) ListUploads(marker string) ([]*PendingPart, string, error) { + input := &s3.ListMultipartUploadsInput{ + Bucket: aws.String(s.bucket), + KeyMarker: aws.String(marker), + } + + result, err := s.s3.ListMultipartUploads(input) + if err != nil { + return nil, "", err + } + parts := make([]*PendingPart, len(result.Uploads)) + for i, u := range result.Uploads { + parts[i] = &PendingPart{*u.Key, *u.UploadId, *u.Initiated} + } + var nextMarker string + if result.NextKeyMarker != nil { + nextMarker = *result.NextKeyMarker + } + return parts, nextMarker, nil +} + +func autoS3Region(bucketName, accessKey, secretKey string) (string, error) { + awsConfig := &aws.Config{ + HTTPClient: httpClient, + } + if accessKey != "" { + awsConfig.Credentials = credentials.NewStaticCredentials(accessKey, secretKey, "") + } + + var regions []string + if r := os.Getenv("AWS_DEFAULT_REGION"); r != "" { + regions = []string{r} + } else { + regions = []string{awsDefaultRegion, "cn-north-1"} + } + + var ( + err error + ses *session.Session + service *s3.S3 + result *s3.GetBucketLocationOutput + ) + for _, r := range regions { + // try to get bucket location + awsConfig.Region = aws.String(r) + ses, err = session.NewSession(awsConfig) + if err != nil { + return "", fmt.Errorf("fail to create aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + service = s3.New(ses) + result, err = service.GetBucketLocation(&s3.GetBucketLocationInput{ + Bucket: aws.String(bucketName), + }) + if err == nil { + logger.Debugf("Get location of bucket %q from region %q endpoint success: %s", + bucketName, r, *result.LocationConstraint) + return *result.LocationConstraint, nil + } + if err1, ok := err.(awserr.Error); ok { + // continue to try other regions if the credentials are invalid, otherwise stop trying. + if errCode := err1.Code(); errCode != "InvalidAccessKeyId" && errCode != "InvalidToken" { + return "", err + } + } + logger.Debugf("Fail to get location of bucket %q from region %q endpoint: %s", bucketName, r, err) + } + return "", err +} + +func parseRegion(endpoint string) string { + if strings.HasPrefix(endpoint, "s3-") || strings.HasPrefix(endpoint, "s3.") { + endpoint = endpoint[3:] + } + if strings.HasPrefix(endpoint, "dualstack") { + endpoint = endpoint[len("dualstack."):] + } + if endpoint == "amazonaws.com" { + endpoint = awsDefaultRegion + "." + endpoint + } + region := strings.Split(endpoint, ".")[0] + if region == "external-1" { + region = awsDefaultRegion + } + return region +} + +func newS3(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + if len(strings.Split(endpoint, ".")) > 1 && !strings.HasSuffix(endpoint, ".amazonaws.com") { + endpoint = fmt.Sprintf("http://%s", endpoint) + } else { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + } + endpoint = strings.Trim(endpoint, "/") + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err.Error()) + } + + var ( + bucketName string + region string + ep string + ) + + if uri.Path != "" { + // [ENDPOINT]/[BUCKET] + pathParts := strings.Split(uri.Path, "/") + bucketName = pathParts[1] + if strings.Contains(uri.Host, ".amazonaws.com") { + // standard s3 + // s3-[REGION].[REST_OF_ENDPOINT]/[BUCKET] + // s3.[REGION].amazonaws.com[.cn]/[BUCKET] + endpoint = uri.Host + region = parseRegion(endpoint) + } else { + // compatible s3 + ep = uri.Host + } + } else { + // [BUCKET].[ENDPOINT] + hostParts := strings.SplitN(uri.Host, ".", 2) + if len(hostParts) == 1 { + // take endpoint as bucketname + bucketName = hostParts[0] + if region, err = autoS3Region(bucketName, accessKey, secretKey); err != nil { + return nil, fmt.Errorf("Can't guess your region for bucket %s: %s", bucketName, err) + } + } else { + // get region or endpoint + if strings.Contains(uri.Host, ".amazonaws.com") { + // standard s3 + // [BUCKET].s3-[REGION].[REST_OF_ENDPOINT] + // [BUCKET].s3.[REGION].amazonaws.com[.cn] + hostParts = strings.SplitN(uri.Host, ".s3", 2) + bucketName = hostParts[0] + endpoint = "s3" + hostParts[1] + region = parseRegion(endpoint) + } else { + // compatible s3 + bucketName = hostParts[0] + ep = hostParts[1] + } + } + } + if region == "" { + region = os.Getenv("AWS_REGION") + } + if region == "" { + region = os.Getenv("AWS_DEFAULT_REGION") + } + if region == "" { + region = awsDefaultRegion + } + + ssl := strings.ToLower(uri.Scheme) == "https" + awsConfig := &aws.Config{ + Region: aws.String(region), + DisableSSL: aws.Bool(!ssl), + HTTPClient: httpClient, + } + if accessKey == "anonymous" { + awsConfig.Credentials = credentials.AnonymousCredentials + } else if accessKey != "" { + awsConfig.Credentials = credentials.NewStaticCredentials(accessKey, secretKey, "") + } + if ep != "" { + awsConfig.Endpoint = aws.String(ep) + awsConfig.S3ForcePathStyle = aws.Bool(true) + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("Fail to create aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &s3client{bucketName, s3.New(ses), ses}, nil +} + +func init() { + Register("s3", newS3) +} diff --git a/pkg/object/scs.go b/pkg/object/scs.go new file mode 100644 index 0000000..40f771a --- /dev/null +++ b/pkg/object/scs.go @@ -0,0 +1,191 @@ +//go:build !noscs +// +build !noscs + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "net/url" + "sort" + "strings" + "time" + + "github.com/Arvintian/scs-go-sdk/scs" +) + +type scsClient struct { + bucket string + c *scs.SCS + b scs.Bucket + marker string +} + +func (s *scsClient) String() string { + return fmt.Sprintf("scs://%s/", s.bucket) +} + +func (s *scsClient) Create() error { + err := s.c.PutBucket(s.bucket, scs.ACLPrivate) + if err != nil && isExists(err) { + err = nil + } + return err +} + +func (s *scsClient) Head(key string) (Object, error) { + om, err := s.b.Head(key) + if err != nil { + return nil, err + } + mtime, err := time.Parse(time.RFC1123, om.LastModified) + if err != nil { + return nil, err + } + return &obj{key: key, size: om.ContentLength, mtime: mtime, isDir: strings.HasSuffix(key, "/")}, nil +} + +func (s *scsClient) Get(key string, off, limit int64) (io.ReadCloser, error) { + if off > 0 || limit > 0 { + var r string + if limit > 0 { + r = fmt.Sprintf("%d-%d", off, off+limit-1) + } else { + r = fmt.Sprintf("%d-", off) + } + return s.b.Get(key, r) + } + return s.b.Get(key, "") +} + +func (s *scsClient) Put(key string, in io.Reader) error { + return s.b.Put(key, map[string]string{}, in) +} + +func (s *scsClient) Delete(key string) error { + return s.b.Delete(key) +} + +func (s *scsClient) List(prefix, marker string, limit int64) ([]Object, error) { + if marker != "" { + if s.marker == "" { + // last page + return nil, nil + } + marker = s.marker + } + list, err := s.b.List("", prefix, marker, limit) + if err != nil { + s.marker = "" + return nil, err + } + s.marker = list.NextMarker + n := len(list.Contents) + // Message from scs technical support, the api not guarantee contents is ordered, but marker is work. + // So we sort contents at here, can work both contents is ordered or not ordered. + // https://scs.sinacloud.com/doc/scs/api#get_bucket + sort.Slice(list.Contents, func(i, j int) bool { return list.Contents[i].Name < list.Contents[j].Name }) + objs := make([]Object, n) + for i := 0; i < n; i++ { + ob := list.Contents[i] + mtime, _ := time.Parse(time.RFC1123, ob.LastModified) + objs[i] = &obj{ + key: ob.Name, + size: ob.Size, + mtime: mtime, + isDir: strings.HasSuffix(ob.Name, "/"), + } + } + return objs, nil +} + +func (s *scsClient) ListAll(prefix, marker string) (<-chan Object, error) { + return nil, notSupported +} + +func (s *scsClient) CreateMultipartUpload(key string) (*MultipartUpload, error) { + mu, err := s.b.InitiateMultipartUpload(key, map[string]string{}) + if err != nil { + return nil, err + } + return &MultipartUpload{ + MinPartSize: 5 << 20, + MaxCount: 2048, + UploadID: mu.UploadID, + }, nil +} + +func (s *scsClient) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + p, err := s.b.UploadPart(key, uploadID, num, bytes.NewReader(body)) + if err != nil { + return nil, err + } + return &Part{ + Num: p.PartNumber, + Size: p.Size, + ETag: p.ETag, + }, nil +} + +func (s *scsClient) AbortUpload(key string, uploadID string) {} + +func (s *scsClient) CompleteUpload(key string, uploadID string, parts []*Part) error { + ps := make([]scs.Part, len(parts)) + for i := 0; i < len(parts); i++ { + ps[i] = scs.Part{ + PartNumber: parts[i].Num, + ETag: parts[i].ETag, + } + } + return s.b.CompleteMultipartUpload(key, uploadID, ps) +} + +func (s *scsClient) ListUploads(marker string) ([]*PendingPart, string, error) { + return nil, "", notSupported +} + +func newSCS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err) + } + hostParts := strings.SplitN(uri.Host, ".", 2) + bucketName := hostParts[0] + var domain string + if len(hostParts) > 1 { + domain = uri.Scheme + "://" + hostParts[1] + } + c, err := scs.NewSCS(accessKey, secretKey, domain) + if err != nil { + return nil, err + } + b, err := c.GetBucket(bucketName) + if err != nil { + return nil, err + } + return &scsClient{bucket: bucketName, c: c, b: b, marker: ""}, nil +} + +func init() { + Register("scs", newSCS) +} diff --git a/pkg/object/scw.go b/pkg/object/scw.go new file mode 100644 index 0000000..3349324 --- /dev/null +++ b/pkg/object/scw.go @@ -0,0 +1,82 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "os" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type scw struct { + s3client +} + +func (s *scw) String() string { + return fmt.Sprintf("scw://%s/", s.s3client.bucket) +} + +func newScw(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[2] + endpoint = uri.Host[len(bucket)+1:] + + if accessKey == "" { + accessKey = os.Getenv("SCW_ACCESS_KEY") + } + if secretKey == "" { + secretKey = os.Getenv("SCW_SECRET_KEY") + } + + awsConfig := &aws.Config{ + Region: ®ion, + Endpoint: &endpoint, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(false), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &scw{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("scw", newScw) +} diff --git a/pkg/object/sftp.go b/pkg/object/sftp.go new file mode 100644 index 0000000..7db7317 --- /dev/null +++ b/pkg/object/sftp.go @@ -0,0 +1,491 @@ +//go:build !nosftp +// +build !nosftp + +// Part of this file is borrowed from Rclone under MIT license: +// https://github.com/ncw/rclone/blob/master/backend/sftp/sftp.go + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/pkg/sftp" + "golang.org/x/crypto/ssh" +) + +// conn encapsulates an ssh client and corresponding sftp client +type conn struct { + sshClient *ssh.Client + sftpClient *sftp.Client + err chan error +} + +// Wait for connection to close +func (c *conn) wait() { + c.err <- c.sshClient.Conn.Wait() +} + +// Closes the connection +func (c *conn) close() error { + sftpErr := c.sftpClient.Close() + sshErr := c.sshClient.Close() + if sftpErr != nil { + return sftpErr + } + return sshErr +} + +// Returns an error if closed +func (c *conn) closed() error { + select { + case err := <-c.err: + return err + default: + } + return nil +} + +type sftpStore struct { + DefaultObjectStorage + host string + port string + root string + config *ssh.ClientConfig + poolMu sync.Mutex + pool []*conn +} + +// Open a new connection to the SFTP server. +func (f *sftpStore) sftpConnection() (c *conn, err error) { + c = &conn{ + err: make(chan error, 1), + } + conn, err := net.Dial("tcp", net.JoinHostPort(f.host, f.port)) + if err != nil { + return nil, err + } + sshc, chans, reqs, err := ssh.NewClientConn(conn, net.JoinHostPort(f.host, f.port), f.config) + if err != nil { + return nil, err + } + c.sshClient = ssh.NewClient(sshc, chans, reqs) + c.sftpClient, err = sftp.NewClient(c.sshClient) + if err != nil { + _ = c.sshClient.Close() + return nil, errors.Wrap(err, "couldn't initialise SFTP") + } + go c.wait() + return c, nil +} + +// Get an SFTP connection from the pool, or open a new one +func (f *sftpStore) getSftpConnection() (c *conn, err error) { + f.poolMu.Lock() + for len(f.pool) > 0 { + c = f.pool[0] + f.pool = f.pool[1:] + err := c.closed() + if err == nil { + break + } + c = nil + } + f.poolMu.Unlock() + if c != nil { + return c, nil + } + return f.sftpConnection() +} + +// Return an SFTP connection to the pool +// +// It nils the pointed to connection out so it can't be reused +// +// if err is not nil then it checks the connection is alive using a +// Getwd request +func (f *sftpStore) putSftpConnection(pc **conn, err error) { + c := *pc + *pc = nil + if err != nil { + // work out if this is an expected error + underlyingErr := errors.Cause(err) + isRegularError := false + switch underlyingErr { + case os.ErrNotExist: + isRegularError = true + default: + switch underlyingErr.(type) { + case *sftp.StatusError, *os.PathError: + isRegularError = true + } + } + // If not a regular SFTP error code then check the connection + if !isRegularError { + _, nopErr := c.sftpClient.Getwd() + if nopErr != nil { + _ = c.close() + return + } + } + } + f.poolMu.Lock() + f.pool = append(f.pool, c) + f.poolMu.Unlock() +} + +func (f *sftpStore) String() string { + return fmt.Sprintf("%s@%s:%s/", f.config.User, f.host, f.root) +} + +// always preserve suffix `/` for directory key +func (f *sftpStore) path(key string) string { + if key == "" { + return f.root + } + var absPath string + if strings.HasSuffix(key, dirSuffix) { + absPath = filepath.Join(f.root, key) + dirSuffix + } else { + absPath = filepath.Join(f.root, key) + } + if runtime.GOOS == "windows" { + absPath = strings.Replace(absPath, "\\", "/", -1) + } + return absPath +} + +func (f *sftpStore) Head(key string) (Object, error) { + c, err := f.getSftpConnection() + if err != nil { + return nil, err + } + defer f.putSftpConnection(&c, err) + + info, err := c.sftpClient.Stat(f.path(key)) + if err != nil { + return nil, err + } + return fileInfo(key, info), nil +} + +func (f *sftpStore) Get(key string, off, limit int64) (io.ReadCloser, error) { + c, err := f.getSftpConnection() + if err != nil { + return nil, err + } + defer f.putSftpConnection(&c, err) + + p := f.path(key) + ff, err := c.sftpClient.Open(p) + if err != nil { + return nil, err + } + finfo, err := ff.Stat() + if err != nil { + return nil, err + } + if finfo.IsDir() { + return ioutil.NopCloser(bytes.NewBuffer([]byte{})), nil + } + + if off > 0 { + if _, err := ff.Seek(off, 0); err != nil { + _ = ff.Close() + return nil, err + } + } + if limit > 0 { + buf := make([]byte, limit) + if n, err := ff.Read(buf); n == 0 && err != nil { + return nil, err + } else { + return ioutil.NopCloser(bytes.NewBuffer(buf[:n])), nil + } + } + return ff, err +} + +func (f *sftpStore) Put(key string, in io.Reader) error { + c, err := f.getSftpConnection() + if err != nil { + return err + } + defer f.putSftpConnection(&c, err) + + p := f.path(key) + if strings.HasSuffix(p, dirSuffix) { + return c.sftpClient.MkdirAll(p) + } + if err := c.sftpClient.MkdirAll(filepath.Dir(p)); err != nil { + return err + } + tmp := filepath.Join(filepath.Dir(p), "."+filepath.Base(p)+".tmp") + if runtime.GOOS == "windows" { + tmp = strings.Replace(tmp, "\\", "/", -1) + } + + ff, err := c.sftpClient.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC) + if err != nil { + return err + } + defer func() { _ = c.sftpClient.Remove(tmp) }() + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + _, err = io.CopyBuffer(ff, in, *buf) + if err != nil { + _ = ff.Close() + return err + } + err = ff.Close() + if err != nil { + return err + } + _ = c.sftpClient.Remove(p) + return c.sftpClient.Rename(tmp, p) +} + +func (f *sftpStore) Chtimes(key string, mtime time.Time) error { + c, err := f.getSftpConnection() + if err != nil { + return err + } + defer f.putSftpConnection(&c, err) + return c.sftpClient.Chtimes(f.path(key), mtime, mtime) +} + +func (f *sftpStore) Chmod(key string, mode os.FileMode) error { + c, err := f.getSftpConnection() + if err != nil { + return err + } + defer f.putSftpConnection(&c, err) + return c.sftpClient.Chmod(f.path(key), mode) +} + +func (f *sftpStore) Chown(key string, owner, group string) error { + c, err := f.getSftpConnection() + if err != nil { + return err + } + defer f.putSftpConnection(&c, err) + uid := lookupUser(owner) + gid := lookupGroup(group) + return c.sftpClient.Chown(f.path(key), uid, gid) +} + +func (f *sftpStore) Delete(key string) error { + c, err := f.getSftpConnection() + if err != nil { + return err + } + defer f.putSftpConnection(&c, err) + err = c.sftpClient.Remove(f.path(key)) + if err != nil && os.IsNotExist(err) { + err = nil + } + return err +} + +func sortFIsByName(fis []os.FileInfo) { + sort.Slice(fis, func(i, j int) bool { + name1 := fis[i].Name() + if fis[i].IsDir() { + name1 += "/" + } + name2 := fis[j].Name() + if fis[j].IsDir() { + name2 += "/" + } + return name1 < name2 + }) +} + +func fileInfo(key string, fi os.FileInfo) Object { + owner, group := getOwnerGroup(fi) + f := &file{ + obj{key, fi.Size(), fi.ModTime(), fi.IsDir()}, + owner, + group, + fi.Mode(), + } + if fi.IsDir() { + if key != "" && !strings.HasSuffix(key, "/") { + f.key += "/" + } + f.size = 0 + } + return f +} + +func (f *sftpStore) doFind(c *sftp.Client, path, marker string, out chan Object) { + infos, err := c.ReadDir(path) + if err != nil { + logger.Errorf("readdir %s: %s", path, err) + return + } + + sortFIsByName(infos) + for _, fi := range infos { + p := path + fi.Name() + key := p[len(f.root):] + if key > marker { + out <- fileInfo(key, fi) + } + if fi.IsDir() && (key > marker || strings.HasPrefix(marker, key)) { + f.doFind(c, p+dirSuffix, marker, out) + } + } +} + +func (f *sftpStore) find(c *sftp.Client, path, marker string, out chan Object) { + if strings.HasSuffix(path, dirSuffix) { + fi, err := c.Stat(path) + if err != nil { + logger.Errorf("Stat %s error: %q", path, err) + return + } + if marker == "" { + out <- fileInfo(path[len(f.root):], fi) + } + f.doFind(c, path, marker, out) + } else { + // As files or dirs in the same directory of file `path` resides + // may have prefix `path`, we should list the directory. + dir := filepath.Dir(path) + dirSuffix + infos, err := c.ReadDir(dir) + if err != nil { + logger.Errorf("readdir %s: %s", dir, err) + return + } + + sortFIsByName(infos) + for _, fi := range infos { + p := dir + fi.Name() + if !strings.HasPrefix(p, f.root) { + if p > f.root { + break + } + continue + } + + key := p[len(f.root):] + prefix := path[len(f.root):] + if strings.HasPrefix(key, prefix) { + if key > marker || marker == "" { + out <- fileInfo(key, fi) + } + if fi.IsDir() && (key > marker || strings.HasPrefix(marker, key)) { + f.doFind(c, p+dirSuffix, marker, out) + } + } + } + } +} + +func (f *sftpStore) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, notSupported +} + +func (f *sftpStore) ListAll(prefix, marker string) (<-chan Object, error) { + c, err := f.getSftpConnection() + if err != nil { + return nil, err + } + listed := make(chan Object, 10240) + go func() { + defer f.putSftpConnection(&c, nil) + + f.find(c.sftpClient, f.path(prefix), marker, listed) + close(listed) + }() + return listed, nil +} + +func newSftp(endpoint, user, pass string) (ObjectStorage, error) { + idx := strings.LastIndex(endpoint, ":") + host, port, err := net.SplitHostPort(endpoint[:idx]) + if err != nil && strings.Contains(err.Error(), "missing port") { + host, port, err = net.SplitHostPort(endpoint[:idx] + ":22") + } + if err != nil { + return nil, fmt.Errorf("unable to parse host from endpoint (%s): %q", endpoint, err) + } + root := filepath.Clean(endpoint[idx+1:]) + if runtime.GOOS == "windows" { + root = strings.Replace(root, "\\", "/", -1) + } + // append suffix `/` removed by filepath.Clean() + // `.` is a directory, add `/` + if strings.HasSuffix(endpoint[idx+1:], dirSuffix) || root == "." { + root = root + dirSuffix + } + + config := &ssh.ClientConfig{ + User: user, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + Timeout: time.Second * 3, + } + + if pass != "" { + config.Auth = append(config.Auth, ssh.Password(pass)) + } + + if privateKeyPath := os.Getenv("SSH_PRIVATE_KEY_PATH"); privateKeyPath != "" { + key, err := ioutil.ReadFile(privateKeyPath) + if err != nil { + return nil, fmt.Errorf("unable to read private key, error: %v", err) + } + + signer, err := ssh.ParsePrivateKey(key) + if err != nil { + return nil, fmt.Errorf("unable to parse private key, error: %v", err) + } + + config.Auth = append(config.Auth, ssh.PublicKeys(signer)) + } + + f := &sftpStore{ + host: host, + port: port, + root: root, + config: config, + } + + c, err := f.getSftpConnection() + if err != nil { + logger.Errorf("getSftpConnection failed: %s", err) + return nil, err + } + defer f.putSftpConnection(&c, err) + + if strings.HasSuffix(root, dirSuffix) { + logger.Debugf("Ensure directory %s", root) + if err := c.sftpClient.MkdirAll(root); err != nil { + return nil, fmt.Errorf("Creating directory %s failed: %q", root, err) + } + } else { + dir := filepath.Dir(root) + logger.Debugf("Ensure directory %s", dir) + if err := c.sftpClient.MkdirAll(dir); err != nil { + return nil, fmt.Errorf("Creating directory %s failed: %q", dir, err) + } + } + + return f, nil +} + +func init() { + Register("sftp", newSftp) +} diff --git a/pkg/object/sharding.go b/pkg/object/sharding.go new file mode 100644 index 0000000..e9ae6da --- /dev/null +++ b/pkg/object/sharding.go @@ -0,0 +1,202 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "container/heap" + "fmt" + "hash/fnv" + "io" + "strings" + "time" +) + +type sharded struct { + DefaultObjectStorage + stores []ObjectStorage +} + +func (s *sharded) String() string { + return fmt.Sprintf("shard%d://%s", len(s.stores), s.stores[0]) +} + +func (s *sharded) Create() error { + for _, o := range s.stores { + if err := o.Create(); err != nil { + return err + } + } + return nil +} + +func (s *sharded) pick(key string) ObjectStorage { + h := fnv.New32a() + _, _ = h.Write([]byte(key)) + i := h.Sum32() % uint32(len(s.stores)) + return s.stores[i] +} + +func (s *sharded) Head(key string) (Object, error) { + return s.pick(key).Head(key) +} + +func (s *sharded) Get(key string, off, limit int64) (io.ReadCloser, error) { + return s.pick(key).Get(key, off, limit) +} + +func (s *sharded) Put(key string, body io.Reader) error { + return s.pick(key).Put(key, body) +} + +func (s *sharded) Delete(key string) error { + return s.pick(key).Delete(key) +} + +const maxResults = 10000 + +// ListAll on all the keys that starts at marker from object storage. +func ListAll(store ObjectStorage, prefix, marker string) (<-chan Object, error) { + if ch, err := store.ListAll(prefix, marker); err == nil { + return ch, nil + } + + startTime := time.Now() + out := make(chan Object, maxResults) + logger.Debugf("Listing objects from %s marker %q", store, marker) + objs, err := store.List("", marker, maxResults) + if err != nil { + logger.Errorf("Can't list %s: %s", store, err.Error()) + return nil, err + } + logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime)) + go func() { + lastkey := "" + first := true + END: + for len(objs) > 0 { + for _, obj := range objs { + key := obj.Key() + if !first && key <= lastkey { + logger.Fatalf("The keys are out of order: marker %q, last %q current %q", marker, lastkey, key) + } + lastkey = key + // logger.Debugf("found key: %s", key) + out <- obj + first = false + } + // Corner case: the func parameter `marker` is an empty string("") and exactly + // one object which key is an empty string("") returned by the List() method. + if lastkey == "" { + break END + } + + marker = lastkey + startTime = time.Now() + logger.Debugf("Continue listing objects from %s marker %q", store, marker) + objs, err = store.List(prefix, marker, maxResults) + for err != nil { + logger.Warnf("Fail to list: %s, retry again", err.Error()) + // slow down + time.Sleep(time.Millisecond * 100) + objs, err = store.List(prefix, marker, maxResults) + } + logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime)) + } + close(out) + }() + return out, nil +} + +type nextKey struct { + o Object + ch <-chan Object +} + +type nextObjects struct { + os []nextKey +} + +func (s *nextObjects) Len() int { return len(s.os) } +func (s *nextObjects) Less(i, j int) bool { return s.os[i].o.Key() < s.os[j].o.Key() } +func (s *nextObjects) Swap(i, j int) { s.os[i], s.os[j] = s.os[j], s.os[i] } +func (s *nextObjects) Push(o interface{}) { s.os = append(s.os, o.(nextKey)) } +func (s *nextObjects) Pop() interface{} { + o := s.os[len(s.os)-1] + s.os = s.os[:len(s.os)-1] + return o +} + +func (s *sharded) ListAll(prefix, marker string) (<-chan Object, error) { + heads := &nextObjects{make([]nextKey, 0)} + for i := range s.stores { + ch, err := ListAll(s.stores[i], prefix, marker) + if err != nil { + return nil, fmt.Errorf("list %s: %s", s.stores[i], err) + } + first := <-ch + if first != nil { + heads.Push(nextKey{first, ch}) + } + } + heap.Init(heads) + + out := make(chan Object, 1000) + go func() { + for heads.Len() > 0 { + n := heap.Pop(heads).(nextKey) + out <- n.o + o := <-n.ch + if o != nil { + heap.Push(heads, nextKey{o, n.ch}) + } + } + close(out) + }() + return out, nil +} + +func (s *sharded) CreateMultipartUpload(key string) (*MultipartUpload, error) { + return s.pick(key).CreateMultipartUpload(key) +} + +func (s *sharded) UploadPart(key string, uploadID string, num int, body []byte) (*Part, error) { + return s.pick(key).UploadPart(key, uploadID, num, body) +} + +func (s *sharded) AbortUpload(key string, uploadID string) { + s.pick(key).AbortUpload(key, uploadID) +} + +func (s *sharded) CompleteUpload(key string, uploadID string, parts []*Part) error { + return s.pick(key).CompleteUpload(key, uploadID, parts) +} + +func NewSharded(name, endpoint, ak, sk string, shards int) (ObjectStorage, error) { + stores := make([]ObjectStorage, shards) + var err error + for i := range stores { + ep := fmt.Sprintf(endpoint, i) + if strings.HasSuffix(ep, "%!(EXTRA int=0)") { + return nil, fmt.Errorf("can not generate different endpoint using %s", endpoint) + } + stores[i], err = CreateStorage(name, ep, ak, sk) + if err != nil { + return nil, err + } + } + return &sharded{stores: stores}, nil +} diff --git a/pkg/object/space.go b/pkg/object/space.go new file mode 100644 index 0000000..21f1420 --- /dev/null +++ b/pkg/object/space.go @@ -0,0 +1,71 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type space struct { + s3client +} + +func (s *space) String() string { + return fmt.Sprintf("space://%s/", s.s3client.bucket) +} + +func newSpace(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, _ := url.ParseRequestURI(endpoint) + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[1] + endpoint = uri.Host[len(bucket)+1:] + + awsConfig := &aws.Config{ + Region: ®ion, + Endpoint: &endpoint, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(false), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &space{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("space", newSpace) +} diff --git a/pkg/object/speedy.go b/pkg/object/speedy.go new file mode 100644 index 0000000..bdbfba2 --- /dev/null +++ b/pkg/object/speedy.go @@ -0,0 +1,103 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "encoding/xml" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +type speedy struct { + RestfulStorage +} + +func (s *speedy) String() string { + uri, _ := url.ParseRequestURI(s.endpoint) + return fmt.Sprintf("speedy://%s/", uri.Host) +} + +func (s *speedy) List(prefix, marker string, limit int64) ([]Object, error) { + uri, _ := url.ParseRequestURI(s.endpoint) + + query := url.Values{} + query.Add("prefix", prefix) + query.Add("marker", marker) + if limit > 100000 { + limit = 100000 + } + query.Add("max-keys", strconv.Itoa(int(limit)+1)) + uri.RawQuery = query.Encode() + uri.Path = "/" + req, err := http.NewRequest("GET", uri.String(), nil) + if err != nil { + return nil, err + } + now := time.Now().UTC().Format(http.TimeFormat) + req.Header.Add("Date", now) + s.signer(req, s.accessKey, s.secretKey, s.signName) + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return nil, parseError(resp) + } + if resp.ContentLength <= 0 || resp.ContentLength > (1<<31) { + return nil, fmt.Errorf("invalid content length: %d", resp.ContentLength) + } + data := make([]byte, resp.ContentLength) + if _, err := io.ReadFull(resp.Body, data); err != nil { + return nil, err + } + var out ListBucketResult + err = xml.Unmarshal(data, &out) + if err != nil { + return nil, err + } + objs := make([]Object, 0) + for _, item := range out.Contents { + if strings.HasSuffix(item.Key, "/.speedycloud_dir_flag") { + continue + } + objs = append(objs, &obj{item.Key, item.Size, item.LastModified, strings.HasSuffix(item.Key, "/")}) + } + return objs, nil +} + +func newSpeedy(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + return &speedy{RestfulStorage{ + endpoint: endpoint, + accessKey: accessKey, + secretKey: secretKey, + signName: "AWS", + signer: sign, + }}, nil +} + +func init() { + Register("speedy", newSpeedy) +} diff --git a/pkg/object/swift.go b/pkg/object/swift.go new file mode 100644 index 0000000..8227101 --- /dev/null +++ b/pkg/object/swift.go @@ -0,0 +1,106 @@ +//go:build !noswift +// +build !noswift + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "io" + "net/url" + "strings" + + "github.com/ncw/swift" +) + +type swiftOSS struct { + DefaultObjectStorage + conn *swift.Connection + region string + storageUrl string + container string +} + +func (s *swiftOSS) String() string { + return fmt.Sprintf("swift://%s/", s.container) +} + +func (s *swiftOSS) Create() error { + // No error is returned if it already exists but the metadata if any will be updated. + return s.conn.ContainerCreate(s.container, nil) +} + +func (s *swiftOSS) Get(key string, off, limit int64) (io.ReadCloser, error) { + headers := make(map[string]string) + if off > 0 || limit > 0 { + if limit > 0 { + headers["Range"] = fmt.Sprintf("bytes=%d-%d", off, off+limit-1) + } else { + headers["Range"] = fmt.Sprintf("bytes=%d-", off) + } + } + f, _, err := s.conn.ObjectOpen(s.container, key, true, headers) + return f, err +} + +func (s *swiftOSS) Put(key string, in io.Reader) error { + _, err := s.conn.ObjectPut(s.container, key, in, true, "", "", nil) + return err +} + +func (s *swiftOSS) Delete(key string) error { + return s.conn.ObjectDelete(s.container, key) +} + +func newSwiftOSS(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("http://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + if uri.Scheme != "http" && uri.Scheme != "https" { + return nil, fmt.Errorf("Invalid uri.Scheme: %s", uri.Scheme) + } + + hostSlice := strings.SplitN(uri.Host, ".", 2) + if len(hostSlice) != 2 { + return nil, fmt.Errorf("Invalid host: %s", uri.Host) + } + container := hostSlice[0] + host := hostSlice[1] + + // current only support V1 authentication + authURL := uri.Scheme + "://" + host + "/auth/v1.0" + + conn := swift.Connection{ + UserName: accessKey, + ApiKey: secretKey, + AuthUrl: authURL, + } + err = conn.Authenticate() + if err != nil { + return nil, fmt.Errorf("Auth: %s", err) + } + return &swiftOSS{DefaultObjectStorage{}, &conn, conn.Region, conn.StorageUrl, container}, nil +} + +func init() { + Register("swift", newSwiftOSS) +} diff --git a/pkg/object/tikv.go b/pkg/object/tikv.go new file mode 100644 index 0000000..834fc05 --- /dev/null +++ b/pkg/object/tikv.go @@ -0,0 +1,123 @@ +//go:build !notikv +// +build !notikv + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "io/ioutil" + "strings" + "time" + + plog "github.com/pingcap/log" + "github.com/sirupsen/logrus" + "github.com/tikv/client-go/v2/config" + "github.com/tikv/client-go/v2/rawkv" +) + +type tikv struct { + DefaultObjectStorage + c *rawkv.Client + addr string +} + +func (t *tikv) String() string { + return fmt.Sprintf("tikv://%s/", t.addr) +} + +func (t *tikv) Get(key string, off, limit int64) (io.ReadCloser, error) { + d, err := t.c.Get(context.TODO(), []byte(key)) + if len(d) == 0 { + err = errors.New("not found") + } + if err != nil { + return nil, err + } + data := d[off:] + if limit > 0 && limit < int64(len(data)) { + data = data[:limit] + } + return ioutil.NopCloser(bytes.NewBuffer(data)), nil +} + +func (t *tikv) Put(key string, in io.Reader) error { + d, err := ioutil.ReadAll(in) + if err != nil { + return err + } + return t.c.Put(context.TODO(), []byte(key), d) +} + +func (t *tikv) Head(key string) (Object, error) { + data, err := t.c.Get(context.TODO(), []byte(key)) + return &obj{ + key, + int64(len(data)), + time.Now(), + strings.HasSuffix(key, "/"), + }, err +} + +func (t *tikv) Delete(key string) error { + return t.c.Delete(context.TODO(), []byte(key)) +} + +func (t *tikv) List(prefix, marker string, limit int64) ([]Object, error) { + return nil, errors.New("not supported") +} + +func newTiKV(endpoint, accesskey, secretkey string) (ObjectStorage, error) { + var plvl string // TiKV (PingCap) uses uber-zap logging, make it less verbose + switch logger.Level { + case logrus.TraceLevel: + plvl = "debug" + case logrus.DebugLevel: + plvl = "info" + case logrus.InfoLevel, logrus.WarnLevel: + plvl = "warn" + case logrus.ErrorLevel: + plvl = "error" + default: + plvl = "dpanic" + } + l, prop, _ := plog.InitLogger(&plog.Config{Level: plvl}) + plog.ReplaceGlobals(l, prop) + + pds := strings.Split(endpoint, ",") + for i, pd := range pds { + pd = strings.TrimSpace(pd) + if !strings.Contains(pd, ":") { + pd += ":2379" + } + pds[i] = pd + } + c, err := rawkv.NewClient(context.TODO(), pds, config.DefaultConfig().Security) + if err != nil { + return nil, err + } + return &tikv{c: c, addr: endpoint}, nil +} + +func init() { + Register("tikv", newTiKV) +} diff --git a/pkg/object/ufile.go b/pkg/object/ufile.go new file mode 100644 index 0000000..48897fa --- /dev/null +++ b/pkg/object/ufile.go @@ -0,0 +1,333 @@ +//go:build !noufile +// +build !noufile + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "crypto/hmac" + "crypto/sha1" + "encoding/base64" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +type ufile struct { + RestfulStorage +} + +func (u *ufile) String() string { + uri, _ := url.ParseRequestURI(u.endpoint) + return fmt.Sprintf("ufile://%s/", uri.Host) +} + +func ufileSigner(req *http.Request, accessKey, secretKey, signName string) { + if accessKey == "" { + return + } + toSign := req.Method + "\n" + for _, n := range HEADER_NAMES { + toSign += req.Header.Get(n) + "\n" + } + bucket := strings.Split(req.URL.Host, ".")[0] + key := req.URL.Path + // Hack for UploadHit + if len(req.URL.RawQuery) > 0 { + vs, _ := url.ParseQuery(req.URL.RawQuery) + if _, ok := vs["FileName"]; ok { + key = "/" + vs.Get("FileName") + } + } + toSign += "/" + bucket + key + h := hmac.New(sha1.New, []byte(secretKey)) + _, _ = h.Write([]byte(toSign)) + sig := base64.StdEncoding.EncodeToString(h.Sum(nil)) + token := signName + " " + accessKey + ":" + sig + req.Header.Add("Authorization", token) +} + +func (u *ufile) Create() error { + uri, _ := url.ParseRequestURI(u.endpoint) + parts := strings.Split(uri.Host, ".") + name := parts[0] + region := parts[1] // www.cn-bj.ufileos.com + if region == "ufile" { + region = parts[2] // www.ufile.cn-north-02.ucloud.cn + } + if strings.HasPrefix(region, "internal") { + // www.internal-hk-01.ufileos.cn + // www.internal-cn-gd-02.ufileos.cn + ps := strings.Split(region, "-") + region = strings.Join(ps[1:len(ps)-1], "-") + } + + query := url.Values{} + query.Add("Action", "CreateBucket") + query.Add("BucketName", name) + query.Add("PublicKey", u.accessKey) + query.Add("Region", region) + + // generate signature + toSign := fmt.Sprintf("ActionCreateBucketBucketName%sPublicKey%sRegion%s", + name, u.accessKey, region) + + sum := sha1.Sum([]byte(toSign + u.secretKey)) + sig := hex.EncodeToString(sum[:]) + query.Add("Signature", sig) + + req, err := http.NewRequest("GET", "https://api.ucloud.cn/?"+query.Encode(), nil) + if err != nil { + return err + } + resp, err := httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + err = parseError(resp) + if strings.Contains(err.Error(), "duplicate bucket name") || + strings.Contains(err.Error(), "CreateBucketResponse") { + err = nil + } + return err +} + +func (u *ufile) parseResp(resp *http.Response, out interface{}) error { + defer resp.Body.Close() + var data []byte + if resp.ContentLength <= 0 || resp.ContentLength > (1<<31) { + d, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + data = d + } else { + data = make([]byte, resp.ContentLength) + if _, err := io.ReadFull(resp.Body, data); err != nil { + return err + } + } + + if resp.StatusCode != 200 { + return fmt.Errorf("status: %v, message: %s", resp.StatusCode, string(data)) + } + err := json.Unmarshal(data, out) + if err != nil { + return err + } + return nil +} + +func copyObj(store ObjectStorage, dst, src string) error { + in, err := store.Get(src, 0, -1) + if err != nil { + return err + } + defer in.Close() + d, err := ioutil.ReadAll(in) + if err != nil { + return err + } + return store.Put(dst, bytes.NewReader(d)) +} + +func (u *ufile) Copy(dst, src string) error { + resp, err := u.request("HEAD", src, nil, nil) + if err != nil { + return copyObj(u, dst, src) + } + if resp.StatusCode != 200 { + return copyObj(u, dst, src) + } + + etag := resp.Header["Etag"] + if len(etag) < 1 { + return copyObj(u, dst, src) + } + hash := etag[0][1 : len(etag[0])-1] + lens := resp.Header["Content-Length"] + if len(lens) < 1 { + return copyObj(u, dst, src) + } + uri := fmt.Sprintf("uploadhit?Hash=%s&FileName=%s&FileSize=%s", hash, dst, lens[0]) + resp, err = u.request("POST", uri, nil, nil) + if err != nil { + goto fallback + } + defer cleanup(resp) + if resp.StatusCode != 200 { + goto fallback + } + return nil +fallback: + return copyObj(u, dst, src) +} + +type DataItem struct { + FileName string + Size int64 + ModifyTime int +} + +// ListObjectsOutput presents output for ListObjects. +type uFileListObjectsOutput struct { + // Object keys + DataSet []*DataItem `json:"DataSet,omitempty"` +} + +func (u *ufile) List(prefix, marker string, limit int64) ([]Object, error) { + query := url.Values{} + query.Add("list", "") + query.Add("prefix", prefix) + query.Add("marker", marker) + if limit > 1000 { + limit = 1000 + } + query.Add("limit", strconv.Itoa(int(limit))) + resp, err := u.request("GET", "?"+query.Encode(), nil, nil) + if err != nil { + return nil, err + } + + var out uFileListObjectsOutput + if err := u.parseResp(resp, &out); err != nil { + return nil, err + } + objs := make([]Object, len(out.DataSet)) + for i, item := range out.DataSet { + objs[i] = &obj{item.FileName, item.Size, time.Unix(int64(item.ModifyTime), 0), strings.HasSuffix(item.FileName, "/")} + } + return objs, nil +} + +type ufileCreateMultipartUploadResult struct { + UploadId string + BlkSize int + Bucket string + Key string +} + +func (u *ufile) CreateMultipartUpload(key string) (*MultipartUpload, error) { + resp, err := u.request("POST", key+"?uploads", nil, nil) + if err != nil { + return nil, err + } + var out ufileCreateMultipartUploadResult + if err := u.parseResp(resp, &out); err != nil { + return nil, err + } + return &MultipartUpload{UploadID: out.UploadId, MinPartSize: out.BlkSize, MaxCount: 1000000}, nil +} + +func (u *ufile) UploadPart(key string, uploadID string, num int, data []byte) (*Part, error) { + // UFile require the PartNumber to start from 0 (continious) + num-- + path := fmt.Sprintf("%s?uploadId=%s&partNumber=%d", key, uploadID, num) + resp, err := u.request("PUT", path, bytes.NewReader(data), nil) + if err != nil { + return nil, err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return nil, fmt.Errorf("UploadPart: %s", parseError(resp).Error()) + } + etags := resp.Header["Etag"] + if len(etags) < 1 { + return nil, errors.New("No ETag") + } + return &Part{Num: num, Size: len(data), ETag: strings.Trim(etags[0], "\"")}, nil +} + +func (u *ufile) AbortUpload(key string, uploadID string) { + _, _ = u.request("DELETE", key+"?uploads="+uploadID, nil, nil) +} + +func (u *ufile) CompleteUpload(key string, uploadID string, parts []*Part) error { + etags := make([]string, len(parts)) + for i, p := range parts { + etags[i] = p.ETag + } + resp, err := u.request("POST", key+"?uploadId="+uploadID, bytes.NewReader([]byte(strings.Join(etags, ","))), nil) + if err != nil { + return err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return fmt.Errorf("CompleteMultipart: %s", parseError(resp).Error()) + } + return nil +} + +type ufileUpload struct { + FileName string + UploadId string + StartTime int +} + +type ufileListMultipartUploadsResult struct { + RetCode int + ErrMsg string + NextMarker string + DataSet []*ufileUpload +} + +func (u *ufile) ListUploads(marker string) ([]*PendingPart, string, error) { + query := url.Values{} + query.Add("muploadid", "") + query.Add("prefix", "") + query.Add("marker", marker) + query.Add("limit", strconv.Itoa(1000)) + resp, err := u.request("GET", "?"+query.Encode(), nil, nil) + if err != nil { + return nil, "", err + } + var out ufileListMultipartUploadsResult + // FIXME: invalid auth + if err := u.parseResp(resp, &out); err != nil { + return nil, "", err + } + if out.RetCode != 0 { + return nil, "", errors.New(out.ErrMsg) + } + parts := make([]*PendingPart, len(out.DataSet)) + for i, u := range out.DataSet { + parts[i] = &PendingPart{u.FileName, u.UploadId, time.Unix(int64(u.StartTime), 0)} + } + return parts, out.NextMarker, nil +} + +func newUFile(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + return &ufile{RestfulStorage{DefaultObjectStorage{}, endpoint, accessKey, secretKey, "UCloud", ufileSigner}}, nil +} + +func init() { + Register("ufile", newUFile) +} diff --git a/pkg/object/upyun.go b/pkg/object/upyun.go new file mode 100644 index 0000000..b4d53b3 --- /dev/null +++ b/pkg/object/upyun.go @@ -0,0 +1,152 @@ +//go:build !noupyun +// +build !noupyun + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/url" + "strings" + + "github.com/upyun/go-sdk/v3/upyun" +) + +type up struct { + DefaultObjectStorage + c *upyun.UpYun + listing chan *upyun.FileInfo + err error +} + +func (u *up) String() string { + return fmt.Sprintf("upyun://%s/", u.c.Bucket) +} + +func (u *up) Create() error { + return nil +} + +func (u *up) Head(key string) (Object, error) { + info, err := u.c.GetInfo("/" + key) + if err != nil { + return nil, err + } + return &obj{ + key, + info.Size, + info.Time, + strings.HasSuffix(key, "/"), + }, nil +} + +func (u *up) Get(key string, off, limit int64) (io.ReadCloser, error) { + w := bytes.NewBuffer(nil) + _, err := u.c.Get(&upyun.GetObjectConfig{ + Path: "/" + key, + Writer: w, + }) + if err != nil { + return nil, err + } + data := w.Bytes()[off:] + if limit > 0 && limit < int64(len(data)) { + data = data[:limit] + } + return ioutil.NopCloser(bytes.NewBuffer(data)), nil +} + +func (u *up) Put(key string, in io.Reader) error { + return u.c.Put(&upyun.PutObjectConfig{ + Path: "/" + key, + Reader: in, + }) +} + +func (u *up) Delete(key string) error { + return u.c.Delete(&upyun.DeleteObjectConfig{ + Path: "/" + key, + }) +} + +func (u *up) Copy(dst, src string) error { + return u.c.Copy(&upyun.CopyObjectConfig{ + SrcPath: "/" + src, + DestPath: "/" + dst, + }) +} + +func (u *up) List(prefix, marker string, limit int64) ([]Object, error) { + if u.listing == nil { + listing := make(chan *upyun.FileInfo, limit) + go func() { + u.err = u.c.List(&upyun.GetObjectsConfig{ + Path: "/" + prefix, + ObjectsChan: listing, + MaxListTries: 10, + MaxListLevel: -1, + }) + }() + u.listing = listing + } + objs := make([]Object, 0, limit) + for len(objs) < int(limit) { + fi, ok := <-u.listing + if !ok { + break + } + key := prefix + "/" + fi.Name + if !fi.IsDir && key > marker { + objs = append(objs, &obj{key, fi.Size, fi.Time, strings.HasSuffix(key, "/")}) + } + } + if len(objs) > 0 { + return objs, nil + } + u.listing = nil + return nil, u.err +} + +func newUpyun(endpoint, user, passwd string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + bucket := strings.Split(uri.Host, ".")[0] + cfg := &upyun.UpYunConfig{ + Bucket: bucket, + Operator: user, + Password: passwd, + UserAgent: UserAgent, + Hosts: make(map[string]string), + } + if strings.Contains(uri.Host, ".") { + cfg.Hosts["v0.api.upyun.com"] = strings.SplitN(uri.Host, ".", 2)[1] + } + return &up{c: upyun.NewUpYun(cfg)}, nil +} + +func init() { + Register("upyun", newUpyun) +} diff --git a/pkg/object/wasabi.go b/pkg/object/wasabi.go new file mode 100644 index 0000000..352fb51 --- /dev/null +++ b/pkg/object/wasabi.go @@ -0,0 +1,74 @@ +//go:build !nos3 +// +build !nos3 + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "fmt" + "net/url" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +type wasabi struct { + s3client +} + +func (s *wasabi) String() string { + return fmt.Sprintf("wasabi://%s/", s.s3client.bucket) +} + +func newWasabi(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + ssl := strings.ToLower(uri.Scheme) == "https" + hostParts := strings.Split(uri.Host, ".") + bucket := hostParts[0] + region := hostParts[2] + endpoint = uri.Host[len(bucket)+1:] + + awsConfig := &aws.Config{ + Region: ®ion, + Endpoint: &endpoint, + DisableSSL: aws.Bool(!ssl), + S3ForcePathStyle: aws.Bool(false), + HTTPClient: httpClient, + Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), + } + + ses, err := session.NewSession(awsConfig) + if err != nil { + return nil, fmt.Errorf("aws session: %s", err) + } + ses.Handlers.Build.PushFront(disableSha256Func) + return &wasabi{s3client{bucket, s3.New(ses), ses}}, nil +} + +func init() { + Register("wasabi", newWasabi) +} diff --git a/pkg/object/webdav.go b/pkg/object/webdav.go new file mode 100644 index 0000000..cdbb8e9 --- /dev/null +++ b/pkg/object/webdav.go @@ -0,0 +1,194 @@ +//go:build !nowebdav +// +build !nowebdav + +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "bytes" + "fmt" + "io" + "net/http" + "net/url" + "path" + "strings" + + gowebdav "github.com/emersion/go-webdav" +) + +type webdav struct { + DefaultObjectStorage + endpoint *url.URL + c *gowebdav.Client +} + +func (w *webdav) String() string { + return fmt.Sprintf("webdav://%s/", w.endpoint.Host) +} + +func (w *webdav) Create() error { + return nil +} + +func (w *webdav) Head(key string) (Object, error) { + info, err := w.c.Stat(key) + if err != nil { + return nil, err + } + return &obj{ + key, + info.Size, + info.ModTime, + strings.HasSuffix(key, "/"), + }, nil +} + +func (w *webdav) Get(key string, off, limit int64) (io.ReadCloser, error) { + if off == 0 && limit <= 0 { + return w.c.Open(key) + } + url := &url.URL{ + Scheme: w.endpoint.Scheme, + User: w.endpoint.User, + Host: w.endpoint.Host, + Path: path.Join(w.endpoint.Path, key), + } + req, err := http.NewRequest("GET", url.String(), nil) + if err != nil { + return nil, err + } + if limit > 0 { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", off, off+limit-1)) + } else { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-", off)) + } + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 && resp.StatusCode != 206 { + return nil, parseError(resp) + } + return resp.Body, nil +} + +func (w *webdav) mkdirs(p string) error { + err := w.c.Mkdir(p) + if err != nil && w.isNotExist(path.Dir(p)) { + if w.mkdirs(path.Dir(p)) == nil { + err = w.c.Mkdir(p) + } + } + return err +} + +func (w *webdav) isNotExist(key string) bool { + if _, err := w.c.Stat(key); err != nil { + return strings.Contains(strings.ToLower(err.Error()), "not found") + } + return false +} + +func (w *webdav) Put(key string, in io.Reader) error { + var buf = bytes.NewBuffer(nil) + in = io.TeeReader(in, buf) + out, err := w.c.Create(key) + if err != nil { + return err + } + wbuf := bufPool.Get().(*[]byte) + defer bufPool.Put(wbuf) + _, err = io.CopyBuffer(out, in, *wbuf) + if err != nil { + return err + } + err = out.Close() + if err != nil && w.isNotExist(path.Dir(key)) { + if w.mkdirs(path.Dir(key)) == nil { + return w.Put(key, bytes.NewReader(buf.Bytes())) + } + } + return err +} + +func (w *webdav) Delete(key string) error { + err := w.c.RemoveAll(key) + if err != nil && w.isNotExist(key) { + err = nil + } + return err +} + +func (w *webdav) Copy(dst, src string) error { + return w.c.CopyAll(src, dst, true) +} + +func (w *webdav) ListAll(prefix, marker string) (<-chan Object, error) { + listed := make(chan Object, 10240) + var walkRoot string + if strings.HasSuffix(prefix, dirSuffix) { + walkRoot = prefix + } else { + // If the root is not ends with `/`, we'll list the directory root resides. + walkRoot = path.Dir(prefix) + } + infos, err := w.c.Readdir(walkRoot, true) + if err != nil { + return nil, err + } + go func() { + for _, info := range infos { + key := info.Path[len(w.endpoint.Path):] + if info.IsDir || !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) { + continue + } + o := &obj{ + key, + info.Size, + info.ModTime, + info.IsDir, + } + listed <- o + } + close(listed) + }() + return listed, nil +} + +func newWebDAV(endpoint, user, passwd string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("http://%s", endpoint) + } + uri, err := url.ParseRequestURI(endpoint) + if err != nil { + return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err) + } + if uri.Path == "" { + uri.Path = "/" + } + uri.User = url.UserPassword(user, passwd) + c, err := gowebdav.NewClient(httpClient, uri.String()) + if err != nil { + return nil, fmt.Errorf("create client for %s: %s", uri, err) + } + return &webdav{endpoint: uri, c: c}, nil +} + +func init() { + Register("webdav", newWebDAV) +} diff --git a/pkg/object/yovole.go b/pkg/object/yovole.go new file mode 100644 index 0000000..dff51d6 --- /dev/null +++ b/pkg/object/yovole.go @@ -0,0 +1,135 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package object + +import ( + "crypto/hmac" + "crypto/sha1" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + uuid "github.com/satori/go.uuid" +) + +type yovole struct { + RestfulStorage +} + +func (u *yovole) String() string { + uri, _ := url.ParseRequestURI(u.endpoint) + return fmt.Sprintf("yovole://%s/", uri.Host) +} + +func yovoleSigner(req *http.Request, accessKey, secretKey, signName string) { + var headers = []string{"date", "nonce", "version"} + nonce := uuid.NewV4() + req.Header.Add("Nonce", nonce.String()) + req.Header.Add("Version", "2018-10-30") + toSign := fmt.Sprintf("date:%s\nnonce:%s\nversion:2018-10-30\n", req.Header["Date"][0], nonce) + h := hmac.New(sha1.New, []byte(secretKey)) + _, _ = h.Write([]byte(toSign)) + sig := base64.StdEncoding.EncodeToString(h.Sum(nil)) + auth := fmt.Sprintf("YCS1-HMAC-SHA1 Credential=%s, SignedHeaders=%s, Signature=%s", + accessKey, strings.Join(headers, ";"), sig) + req.Header.Add("Authorization", auth) +} + +func (u *yovole) Create() error { + _, err := u.List("", "", 1) + if err != nil { + return fmt.Errorf("projectId needed") + } + return nil +} + +// ListOutput presents output for ListObjects. +type ListResult struct { + ObjectSummaries []ObjectSummaries + BucketName string + Prefix string + MaxKeys int +} + +type ObjectSummaries struct { + Key string + Size int64 + LastModified int64 +} + +func (u *yovole) List(prefix, marker string, limit int64) ([]Object, error) { + uri, _ := url.ParseRequestURI(u.endpoint) + + query := url.Values{} + query.Add("prefix", prefix) + query.Add("marker", marker) + if limit > 100000 { + limit = 100000 + } + query.Add("maxKeys", strconv.Itoa(int(limit))) + uri.RawQuery = query.Encode() + uri.Path = "/" + req, err := http.NewRequest("GET", uri.String(), nil) + if err != nil { + return nil, err + } + now := time.Now().UTC().Format(http.TimeFormat) + req.Header.Add("Date", now) + u.signer(req, u.accessKey, u.secretKey, u.signName) + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer cleanup(resp) + if resp.StatusCode != 200 { + return nil, parseError(resp) + } + if resp.ContentLength <= 0 || resp.ContentLength > (1<<31) { + return nil, fmt.Errorf("invalid content length: %d", resp.ContentLength) + } + data := make([]byte, resp.ContentLength) + if _, err := io.ReadFull(resp.Body, data); err != nil { + return nil, err + } + var out ListResult + err = json.Unmarshal(data, &out) + if err != nil { + return nil, err + } + objs := make([]Object, 0) + for _, item := range out.ObjectSummaries { + objs = append(objs, &obj{item.Key, item.Size, time.Unix(item.LastModified, 0), strings.HasSuffix(item.Key, "/")}) + } + return objs, nil +} + +func newYovole(endpoint, accessKey, secretKey string) (ObjectStorage, error) { + if !strings.Contains(endpoint, "://") { + endpoint = fmt.Sprintf("https://%s", endpoint) + } + return &yovole{RestfulStorage{DefaultObjectStorage{}, endpoint, accessKey, secretKey, "YCS1", yovoleSigner}}, nil +} + +func init() { + Register("yovole", newYovole) +} diff --git a/pkg/sync/cluster.go b/pkg/sync/cluster.go new file mode 100644 index 0000000..b3f4dd9 --- /dev/null +++ b/pkg/sync/cluster.go @@ -0,0 +1,335 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sync + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/object" +) + +// Stat has the counters to represent the progress. +type Stat struct { + Copied int64 // the number of copied files + CopiedBytes int64 // total amount of copied data in bytes + CheckedBytes int64 // total amount of checked data in bytes + Deleted int64 // the number of deleted files + Skipped int64 // the number of files skipped + Failed int64 // the number of files that fail to copy +} + +func updateStats(r *Stat) { + copied.IncrInt64(r.Copied) + copiedBytes.IncrInt64(r.CopiedBytes) + checkedBytes.IncrInt64(r.CheckedBytes) + deleted.IncrInt64(r.Deleted) + skipped.IncrInt64(r.Skipped) + failed.IncrInt64(r.Failed) + handled.IncrInt64(r.Copied + r.Deleted + r.Skipped + r.Failed) +} + +func httpRequest(url string, body []byte) (ans []byte, err error) { + method := "GET" + if body != nil { + method = "POST" + } + req, err := http.NewRequest(method, url, bytes.NewReader(body)) + if err != nil { + return nil, err + } + var resp *http.Response + resp, err = http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return ioutil.ReadAll(resp.Body) +} + +func sendStats(addr string) { + var r Stat + r.Copied = copied.Current() + r.CopiedBytes = copiedBytes.Current() + r.CheckedBytes = checkedBytes.Current() + r.Deleted = deleted.Current() + r.Skipped = skipped.Current() + r.Failed = failed.Current() + d, _ := json.Marshal(r) + ans, err := httpRequest(fmt.Sprintf("http://%s/stats", addr), d) + if err != nil || string(ans) != "OK" { + logger.Errorf("update stats: %s %s", string(ans), err) + } else { + copied.IncrInt64(-r.Copied) + copiedBytes.IncrInt64(-r.CopiedBytes) + checkedBytes.IncrInt64(-r.CheckedBytes) + deleted.IncrInt64(-r.Deleted) + skipped.IncrInt64(-r.Skipped) + failed.IncrInt64(-r.Failed) + } +} + +func findLocalIP() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + for _, iface := range ifaces { + if iface.Flags&net.FlagUp == 0 { + continue // interface down + } + if iface.Flags&net.FlagLoopback != 0 { + continue // loopback interface + } + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + for _, addr := range addrs { + var ip net.IP + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip == nil || ip.IsLoopback() { + continue + } + ip = ip.To4() + if ip == nil { + continue // not an ipv4 address + } + return ip.String(), nil + } + } + return "", errors.New("are you connected to the network?") +} + +func startManager(tasks <-chan object.Object) (string, error) { + http.HandleFunc("/fetch", func(w http.ResponseWriter, req *http.Request) { + var objs []object.Object + obj, ok := <-tasks + if !ok { + _, _ = w.Write([]byte("[]")) + return + } + objs = append(objs, obj) + LOOP: + for { + select { + case obj = <-tasks: + if obj == nil { + break LOOP + } + objs = append(objs, obj) + if len(objs) > 100 { + break LOOP + } + default: + break LOOP + } + } + d, err := marshalObjects(objs) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + logger.Debugf("send %d objects to %s", len(objs), req.RemoteAddr) + _, _ = w.Write(d) + }) + http.HandleFunc("/stats", func(w http.ResponseWriter, req *http.Request) { + if req.Method != "POST" { + http.Error(w, "POST required", http.StatusBadRequest) + return + } + d, err := ioutil.ReadAll(req.Body) + if err != nil { + logger.Errorf("read: %s", err) + return + } + var r Stat + err = json.Unmarshal(d, &r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + updateStats(&r) + logger.Debugf("receive stats %+v from %s", r, req.RemoteAddr) + _, _ = w.Write([]byte("OK")) + }) + ip, err := findLocalIP() + if err != nil { + return "", fmt.Errorf("find local ip: %s", err) + } + l, err := net.Listen("tcp", ip+":") + if err != nil { + return "", fmt.Errorf("listen: %s", err) + } + logger.Infof("Listen at %s", l.Addr()) + go func() { _ = http.Serve(l, nil) }() + ps := strings.Split(l.Addr().String(), ":") + port := ps[len(ps)-1] + return fmt.Sprintf("%s:%s", ip, port), nil +} + +func findSelfPath() (string, error) { + program := os.Args[0] + if strings.Contains(program, "/") { + path, err := filepath.Abs(program) + if err != nil { + return "", fmt.Errorf("resolve path %s: %s", program, err) + } + return path, nil + } + for _, searchPath := range strings.Split(os.Getenv("PATH"), ":") { + if searchPath != "" { + p := filepath.Join(searchPath, program) + if _, err := os.Stat(p); err == nil { + return p, nil + } + } + } + return "", fmt.Errorf("can't find path for %s", program) +} + +func launchWorker(address string, config *Config, wg *sync.WaitGroup) { + workers := strings.Split(strings.Join(config.Workers, ","), ",") + for _, host := range workers { + wg.Add(1) + go func(host string) { + defer wg.Done() + // copy + path, err := findSelfPath() + if err != nil { + logger.Errorf("find self path: %s", err) + return + } + rpath := filepath.Join("/tmp", filepath.Base(path)) + cmd := exec.Command("rsync", "-au", path, host+":"+rpath) + err = cmd.Run() + if err != nil { + // fallback to scp + cmd = exec.Command("scp", path, host+":"+rpath) + err = cmd.Run() + } + if err != nil { + logger.Errorf("copy itself to %s: %s", host, err) + return + } + // launch itself + var args = []string{host, rpath} + if strings.HasSuffix(path, "juicefs") { + args = append(args, os.Args[1:]...) + args = append(args, "--manager", address) + } else { + args = append(args, "--manager", address) + args = append(args, os.Args[1:]...) + } + if !config.Verbose && !config.Quiet { + args = append(args, "-q") + } + + logger.Debugf("launch worker command args: [ssh, %s]", strings.Join(args, ", ")) + cmd = exec.Command("ssh", args...) + stderr, err := cmd.StderrPipe() + if err != nil { + logger.Errorf("redirect stderr: %s", err) + } + err = cmd.Start() + if err != nil { + logger.Errorf("start itself at %s: %s", host, err) + return + } + logger.Infof("launch a worker on %s", host) + go func() { + r := bufio.NewReader(stderr) + for { + line, err := r.ReadString('\n') + if err != nil || len(line) == 0 { + return + } + println(host, line[:len(line)-1]) + } + }() + err = cmd.Wait() + if err != nil { + logger.Errorf("%s: %s", host, err) + } + }(host) + } +} + +func marshalObjects(objs []object.Object) ([]byte, error) { + var arr []map[string]interface{} + for _, o := range objs { + arr = append(arr, object.MarshalObject(o)) + } + return json.MarshalIndent(arr, "", " ") +} + +func unmarshalObjects(d []byte) ([]object.Object, error) { + var arr []map[string]interface{} + err := json.Unmarshal(d, &arr) + if err != nil { + return nil, err + } + var objs []object.Object + for _, m := range arr { + objs = append(objs, object.UnmarshalObject(m)) + } + return objs, nil +} + +func fetchJobs(tasks chan<- object.Object, config *Config) { + for { + url := fmt.Sprintf("http://%s/fetch", config.Manager) + ans, err := httpRequest(url, nil) + if err != nil { + logger.Errorf("fetch jobs: %s", err) + time.Sleep(time.Second) + continue + } + var jobs []object.Object + jobs, err = unmarshalObjects(ans) + if err != nil { + logger.Errorf("Unmarshal %s: %s", string(ans), err) + time.Sleep(time.Second) + continue + } + logger.Debugf("got %d jobs", len(jobs)) + if len(jobs) == 0 { + break + } + for _, obj := range jobs { + tasks <- obj + } + } + close(tasks) +} diff --git a/pkg/sync/cluster_test.go b/pkg/sync/cluster_test.go new file mode 100644 index 0000000..1e3ced5 --- /dev/null +++ b/pkg/sync/cluster_test.go @@ -0,0 +1,61 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sync + +import ( + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/object" +) + +type obj struct { + key string + size int64 + mtime time.Time + isDir bool +} + +func (o *obj) Key() string { return o.key } +func (o *obj) Size() int64 { return o.size } +func (o *obj) Mtime() time.Time { return o.mtime } +func (o *obj) IsDir() bool { return o.isDir } + +func TestCluster(t *testing.T) { + // manager + todo := make(chan object.Object, 100) + addr, err := startManager(todo) + if err != nil { + t.Fatal(err) + } + // sendStats(addr) + // worker + var conf Config + conf.Manager = addr + mytodo := make(chan object.Object, 100) + go fetchJobs(mytodo, &conf) + + todo <- &obj{key: "test"} + close(todo) + + obj := <-mytodo + if obj.Key() != "test" { + t.Fatalf("expect test but got %s", obj.Key()) + } + if _, ok := <-mytodo; ok { + t.Fatalf("should end") + } +} diff --git a/pkg/sync/config.go b/pkg/sync/config.go new file mode 100644 index 0000000..2a07d9c --- /dev/null +++ b/pkg/sync/config.go @@ -0,0 +1,70 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sync + +import ( + "github.com/urfave/cli/v2" +) + +type Config struct { + Start string + End string + Threads int + HTTPPort int + Update bool + ForceUpdate bool + Perms bool + Dry bool + DeleteSrc bool + DeleteDst bool + Dirs bool + Exclude []string + Include []string + Manager string + Workers []string + BWLimit int + NoHTTPS bool + Verbose bool + Quiet bool + CheckAll bool + CheckNew bool +} + +func NewConfigFromCli(c *cli.Context) *Config { + return &Config{ + Start: c.String("start"), + End: c.String("end"), + Threads: c.Int("threads"), + Update: c.Bool("update"), + ForceUpdate: c.Bool("force-update"), + Perms: c.Bool("perms"), + Dirs: c.Bool("dirs"), + Dry: c.Bool("dry"), + DeleteSrc: c.Bool("delete-src"), + DeleteDst: c.Bool("delete-dst"), + Exclude: c.StringSlice("exclude"), + Include: c.StringSlice("include"), + Workers: c.StringSlice("worker"), + Manager: c.String("manager"), + BWLimit: c.Int("bwlimit"), + NoHTTPS: c.Bool("no-https"), + Verbose: c.Bool("verbose"), + Quiet: c.Bool("quiet"), + CheckAll: c.Bool("check-all"), + CheckNew: c.Bool("check-new"), + } +} diff --git a/pkg/sync/sync.go b/pkg/sync/sync.go new file mode 100644 index 0000000..dfbcce3 --- /dev/null +++ b/pkg/sync/sync.go @@ -0,0 +1,772 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sync + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "regexp" + "strings" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juju/ratelimit" +) + +// The max number of key per listing request +const ( + maxResults = 1000 + defaultPartSize = 5 << 20 + bufferSize = 32 << 10 + maxBlock = defaultPartSize * 2 + markDeleteSrc = -1 + markDeleteDst = -2 + markCopyPerms = -3 + markChecksum = -4 +) + +var ( + handled *utils.Bar + copied, copiedBytes *utils.Bar + checkedBytes *utils.Bar + deleted, skipped, failed *utils.Bar + concurrent chan int + limiter *ratelimit.Bucket +) + +var logger = utils.GetLogger("juicefs") + +// human readable bytes size +func formatSize(bytes int64) string { + units := [7]string{" ", "K", "M", "G", "T", "P", "E"} + if bytes < 1024 { + return fmt.Sprintf("%v B", bytes) + } + z := 0 + v := float64(bytes) + for v > 1024.0 { + z++ + v /= 1024.0 + } + return fmt.Sprintf("%.2f %siB", v, units[z]) +} + +// ListAll on all the keys that starts at marker from object storage. +func ListAll(store object.ObjectStorage, start, end string) (<-chan object.Object, error) { + startTime := time.Now() + logger.Debugf("Iterating objects from %s start %q", store, start) + + out := make(chan object.Object, maxResults*10) + + // As the result of object storage's List method doesn't include the marker key, + // we try List the marker key separately. + if start != "" { + if obj, err := store.Head(start); err == nil { + logger.Debugf("Found start key: %s from %s in %s", start, store, time.Since(startTime)) + out <- obj + } + } + + if ch, err := store.ListAll("", start); err == nil { + if end == "" { + go func() { + for obj := range ch { + out <- obj + } + close(out) + }() + return out, nil + } + + go func() { + for obj := range ch { + if obj != nil && obj.Key() > end { + break + } + out <- obj + } + close(out) + }() + return out, nil + } + + marker := start + logger.Debugf("Listing objects from %s marker %q", store, marker) + objs, err := store.List("", marker, maxResults) + if err != nil { + logger.Errorf("Can't list %s: %s", store, err.Error()) + return nil, err + } + logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime)) + go func() { + lastkey := "" + first := true + END: + for len(objs) > 0 { + for _, obj := range objs { + key := obj.Key() + if !first && key <= lastkey { + logger.Fatalf("The keys are out of order: marker %q, last %q current %q", marker, lastkey, key) + } + if end != "" && key > end { + break END + } + lastkey = key + // logger.Debugf("key: %s", key) + out <- obj + first = false + } + // Corner case: the func parameter `marker` is an empty string("") and exactly + // one object which key is an empty string("") returned by the List() method. + if lastkey == "" { + break END + } + + marker = lastkey + startTime = time.Now() + logger.Debugf("Continue listing objects from %s marker %q", store, marker) + objs, err = store.List("", marker, maxResults) + count := 0 + for err != nil && count < 3 { + logger.Warnf("Fail to list: %s, retry again", err.Error()) + // slow down + time.Sleep(time.Millisecond * 100) + objs, err = store.List("", marker, maxResults) + count++ + } + logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime)) + if err != nil { + // Telling that the listing has failed + out <- nil + logger.Errorf("Fail to list after %s: %s", marker, err.Error()) + break + } + if len(objs) > 0 && objs[0].Key() == marker { + // workaround from a object store that is not compatible to S3. + objs = objs[1:] + } + } + close(out) + }() + return out, nil +} + +var bufPool = sync.Pool{ + New: func() interface{} { + buf := make([]byte, bufferSize) + return &buf + }, +} + +func try(n int, f func() error) (err error) { + for i := 0; i < n; i++ { + err = f() + if err == nil { + return + } + time.Sleep(time.Second * time.Duration(i*i)) + } + return +} + +func deleteObj(storage object.ObjectStorage, key string, dry bool) { + if dry { + logger.Infof("Will delete %s from %s", key, storage) + return + } + start := time.Now() + if err := try(3, func() error { return storage.Delete(key) }); err == nil { + deleted.Increment() + logger.Debugf("Deleted %s from %s in %s", key, storage, time.Since(start)) + } else { + failed.Increment() + logger.Errorf("Failed to delete %s from %s in %s: %s", key, storage, time.Since(start), err) + } +} + +func needCopyPerms(o1, o2 object.Object) bool { + f1 := o1.(object.File) + f2 := o2.(object.File) + return f2.Mode() != f1.Mode() || f2.Owner() != f1.Owner() || f2.Group() != f1.Group() +} + +func copyPerms(dst object.ObjectStorage, obj object.Object) { + start := time.Now() + key := obj.Key() + fi := obj.(object.File) + if err := dst.(object.FileSystem).Chmod(key, fi.Mode()); err != nil { + logger.Warnf("Chmod %s to %d: %s", key, fi.Mode(), err) + } + if err := dst.(object.FileSystem).Chown(key, fi.Owner(), fi.Group()); err != nil { + logger.Warnf("Chown %s to (%s,%s): %s", key, fi.Owner(), fi.Group(), err) + } + logger.Debugf("Copied permissions (%s:%s:%s) for %s in %s", fi.Owner(), fi.Group(), fi.Mode(), key, time.Since(start)) +} + +func doCheckSum(src, dst object.ObjectStorage, key string, size int64, equal *bool) error { + abort := make(chan struct{}) + checkPart := func(offset, length int64) error { + if limiter != nil { + limiter.Wait(length) + } + select { + case <-abort: + return fmt.Errorf("aborted") + case concurrent <- 1: + defer func() { + <-concurrent + }() + } + in, err := src.Get(key, offset, length) + if err != nil { + return fmt.Errorf("src get: %s", err) + } + defer in.Close() + in2, err := dst.Get(key, offset, length) + if err != nil { + return fmt.Errorf("dest get: %s", err) + } + defer in2.Close() + + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + buf2 := bufPool.Get().(*[]byte) + defer bufPool.Put(buf2) + for left := int(length); left > 0; left -= bufferSize { + bs := bufferSize + if left < bufferSize { + bs = left + } + *buf = (*buf)[:bs] + *buf2 = (*buf2)[:bs] + if _, err = io.ReadFull(in, *buf); err != nil { + return fmt.Errorf("src read: %s", err) + } + if _, err = io.ReadFull(in2, *buf2); err != nil { + return fmt.Errorf("dest read: %s", err) + } + if !bytes.Equal(*buf, *buf2) { + return fmt.Errorf("bytes not equal") + } + } + return nil + } + + var err error + if size < maxBlock { + err = checkPart(0, size) + } else { + n := int((size-1)/defaultPartSize) + 1 + errs := make(chan error, n) + for i := 0; i < n; i++ { + go func(num int) { + sz := int64(defaultPartSize) + if num == n-1 { + sz = size - int64(num)*defaultPartSize + } + errs <- checkPart(int64(num)*defaultPartSize, sz) + }(i) + } + for i := 0; i < n; i++ { + if err = <-errs; err != nil { + close(abort) + break + } + } + } + + if err != nil && err.Error() == "bytes not equal" { + *equal = false + err = nil + } else { + *equal = err == nil + } + return err +} + +func checkSum(src, dst object.ObjectStorage, key string, size int64) (bool, error) { + start := time.Now() + var equal bool + err := try(3, func() error { return doCheckSum(src, dst, key, size, &equal) }) + if err == nil { + checkedBytes.IncrInt64(size) + if equal { + logger.Debugf("Checked %s OK (and equal) in %s,", key, time.Since(start)) + } else { + logger.Warnf("Checked %s OK (but NOT equal) in %s,", key, time.Since(start)) + } + } else { + logger.Errorf("Failed to check %s in %s: %s", key, time.Since(start), err) + } + return equal, err +} + +func doCopySingle(src, dst object.ObjectStorage, key string, size int64) error { + if limiter != nil { + limiter.Wait(size) + } + concurrent <- 1 + defer func() { + <-concurrent + }() + in, err := src.Get(key, 0, -1) + if err != nil { + if _, e := src.Head(key); e != nil { + logger.Debugf("Head src %s: %s", key, err) + return nil + } + return err + } + defer in.Close() + + if size <= maxBlock || + strings.HasPrefix(src.String(), "file://") || + strings.HasPrefix(dst.String(), "file://") { + return dst.Put(key, in) + } else { // obj.Size > maxBlock, download the object into disk first + f, err := ioutil.TempFile("", "rep") + if err != nil { + return err + } + _ = os.Remove(f.Name()) // will be deleted after Close() + defer f.Close() + + if _, err = io.Copy(f, in); err != nil { + return err + } + // upload + if _, err = f.Seek(0, 0); err != nil { + return err + } + return dst.Put(key, f) + } +} + +func doCopyMultiple(src, dst object.ObjectStorage, key string, size int64, upload *object.MultipartUpload) error { + partSize := int64(upload.MinPartSize) + if partSize == 0 { + partSize = defaultPartSize + } + if size > partSize*int64(upload.MaxCount) { + partSize = size / int64(upload.MaxCount) + partSize = ((partSize-1)>>20 + 1) << 20 // align to MB + } + n := int((size-1)/partSize) + 1 + logger.Debugf("Copying data of %s as %d parts (size: %d): %s", key, n, partSize, upload.UploadID) + abort := make(chan struct{}) + parts := make([]*object.Part, n) + errs := make(chan error, n) + for i := 0; i < n; i++ { + go func(num int) { + sz := partSize + if num == n-1 { + sz = size - int64(num)*partSize + } + if limiter != nil { + limiter.Wait(sz) + } + select { + case <-abort: + errs <- fmt.Errorf("aborted") + return + case concurrent <- 1: + defer func() { + <-concurrent + }() + } + + data := make([]byte, sz) + if err := try(3, func() error { + in, err := src.Get(key, int64(num)*partSize, sz) + if err != nil { + return err + } + defer in.Close() + if _, err = io.ReadFull(in, data); err != nil { + return err + } + // PartNumber starts from 1 + parts[num], err = dst.UploadPart(key, upload.UploadID, num+1, data) + return err + }); err == nil { + errs <- nil + copiedBytes.IncrInt64(sz) + logger.Debugf("Copied data of %s part %d", key, num) + } else { + errs <- fmt.Errorf("part %d: %s", num, err) + logger.Warnf("Failed to copy data of %s part %d: %s", key, num, err) + } + }(i) + } + + var err error + for i := 0; i < n; i++ { + if err = <-errs; err != nil { + close(abort) + break + } + } + if err == nil { + err = try(3, func() error { return dst.CompleteUpload(key, upload.UploadID, parts) }) + } + if err != nil { + dst.AbortUpload(key, upload.UploadID) + return fmt.Errorf("multipart: %s", err) + } + return nil +} + +func copyData(src, dst object.ObjectStorage, key string, size int64) error { + start := time.Now() + var multiple bool + var err error + if size < maxBlock { + err = try(3, func() error { return doCopySingle(src, dst, key, size) }) + } else { + var upload *object.MultipartUpload + if upload, err = dst.CreateMultipartUpload(key); err == nil { + multiple = true + err = doCopyMultiple(src, dst, key, size, upload) + } else { // fallback + err = try(3, func() error { return doCopySingle(src, dst, key, size) }) + } + } + if err == nil { + if !multiple { + copiedBytes.IncrInt64(size) + } + logger.Debugf("Copied data of %s (%d bytes) in %s", key, size, time.Since(start)) + } else { + logger.Errorf("Failed to copy data of %s in %s: %s", key, time.Since(start), err) + } + return err +} + +func worker(tasks <-chan object.Object, src, dst object.ObjectStorage, config *Config) { + for obj := range tasks { + key := obj.Key() + switch obj.Size() { + case markDeleteSrc: + deleteObj(src, key, config.Dry) + case markDeleteDst: + deleteObj(dst, key, config.Dry) + case markCopyPerms: + if config.Dry { + logger.Infof("Will copy permissions for %s", key) + break + } + copyPerms(dst, obj) + copied.Increment() + case markChecksum: + if config.Dry { + logger.Infof("Will compare checksum for %s", key) + break + } + obj = obj.(*withSize).Object + if equal, err := checkSum(src, dst, key, obj.Size()); err != nil { + failed.Increment() + break + } else if equal { + if config.DeleteSrc { + deleteObj(src, key, false) + } else if config.Perms { + if o, e := dst.Head(key); e == nil { + if needCopyPerms(obj, o) { + copyPerms(dst, obj) + copied.Increment() + } else { + skipped.Increment() + } + } else { + logger.Warnf("Failed to head object %s: %s", key, e) + failed.Increment() + } + } else { + skipped.Increment() + } + break + } + // checkSum not equal, copy the object + fallthrough + default: + if config.Dry { + logger.Infof("Will copy %s (%d bytes)", obj.Key(), obj.Size()) + break + } + err := copyData(src, dst, key, obj.Size()) + if err == nil && (config.CheckAll || config.CheckNew) { + var equal bool + if equal, err = checkSum(src, dst, key, obj.Size()); err == nil && !equal { + err = fmt.Errorf("checksums of copied object %s don't match", key) + } + } + if err == nil { + if mc, ok := dst.(object.MtimeChanger); ok { + if err = mc.Chtimes(obj.Key(), obj.Mtime()); err != nil { + logger.Warnf("Update mtime of %s: %s", key, err) + } + } + if config.Perms { + copyPerms(dst, obj) + } + copied.Increment() + } else { + failed.Increment() + logger.Errorf("Failed to copy object %s: %s", key, err) + } + } + handled.Increment() + } +} + +type withSize struct { + object.Object + nsize int64 +} + +func (o *withSize) Size() int64 { + return o.nsize +} + +type withFSize struct { + object.File + nsize int64 +} + +func (o *withFSize) Size() int64 { + return o.nsize +} + +func deleteFromDst(tasks chan<- object.Object, dstobj object.Object, dirs bool) { + if !dirs && dstobj.IsDir() { + logger.Debug("Ignore deleting dst directory ", dstobj.Key()) + return + } + tasks <- &withSize{dstobj, markDeleteDst} + handled.IncrTotal(1) +} + +func producer(tasks chan<- object.Object, src, dst object.ObjectStorage, config *Config) { + start, end := config.Start, config.End + logger.Infof("Syncing from %s to %s", src, dst) + if start != "" { + logger.Infof("first key: %q", start) + } + if end != "" { + logger.Infof("last key: %q", end) + } + logger.Debugf("maxResults: %d, defaultPartSize: %d, maxBlock: %d", maxResults, defaultPartSize, maxBlock) + + srckeys, err := ListAll(src, start, end) + if err != nil { + logger.Fatal(err) + } + + dstkeys, err := ListAll(dst, start, end) + if err != nil { + logger.Fatal(err) + } + if config.Exclude != nil { + srckeys = filter(srckeys, config.Include, config.Exclude) + dstkeys = filter(dstkeys, config.Include, config.Exclude) + } + + defer close(tasks) + var dstobj object.Object + for obj := range srckeys { + if obj == nil { + logger.Errorf("Listing failed, stop syncing, waiting for pending ones") + return + } + if !config.Dirs && obj.IsDir() { + logger.Debug("Ignore directory ", obj.Key()) + continue + } + handled.IncrTotal(1) + + if dstobj != nil && obj.Key() > dstobj.Key() { + if config.DeleteDst { + deleteFromDst(tasks, dstobj, config.Dirs) + } + dstobj = nil + } + if dstobj == nil { + for dstobj = range dstkeys { + if dstobj == nil { + logger.Errorf("Listing failed, stop syncing, waiting for pending ones") + return + } + if obj.Key() <= dstobj.Key() { + break + } + if config.DeleteDst { + deleteFromDst(tasks, dstobj, config.Dirs) + } + dstobj = nil + } + } + + // FIXME: there is a race when source is modified during coping + if dstobj == nil || obj.Key() < dstobj.Key() { + tasks <- obj + } else { // obj.key == dstobj.key + if config.ForceUpdate || + (config.Update && obj.Mtime().Unix() > dstobj.Mtime().Unix()) || + (!config.Update && obj.Size() != dstobj.Size()) { + tasks <- obj + } else if config.Update && obj.Mtime().Unix() < dstobj.Mtime().Unix() { + skipped.Increment() + handled.Increment() + } else if config.CheckAll { // two objects are likely the same + tasks <- &withSize{obj, markChecksum} + } else if config.DeleteSrc { + tasks <- &withSize{obj, markDeleteSrc} + } else if config.Perms && needCopyPerms(obj, dstobj) { + tasks <- &withFSize{obj.(object.File), markCopyPerms} + } else { + skipped.Increment() + handled.Increment() + } + dstobj = nil + } + } + if config.DeleteDst { + if dstobj != nil { + deleteFromDst(tasks, dstobj, config.Dirs) + } + for dstobj = range dstkeys { + if dstobj != nil { + deleteFromDst(tasks, dstobj, config.Dirs) + } + } + } +} + +func compileExp(patterns []string) []*regexp.Regexp { + var rs []*regexp.Regexp + for _, p := range patterns { + r, err := regexp.CompilePOSIX(p) + if err != nil { + logger.Fatalf("invalid regular expression `%s`: %s", p, err) + } + rs = append(rs, r) + } + return rs +} + +func findAny(s string, ps []*regexp.Regexp) bool { + for _, p := range ps { + if p.FindString(s) != "" { + return true + } + } + return false +} + +func filter(keys <-chan object.Object, include, exclude []string) <-chan object.Object { + inc := compileExp(include) + exc := compileExp(exclude) + r := make(chan object.Object) + go func() { + for o := range keys { + if o == nil { + break + } + if findAny(o.Key(), exc) { + logger.Debugf("exclude %s", o.Key()) + continue + } + if len(inc) > 0 && !findAny(o.Key(), inc) { + logger.Debugf("%s is not included", o.Key()) + continue + } + r <- o + } + close(r) + }() + return r +} + +// Sync syncs all the keys between to object storage +func Sync(src, dst object.ObjectStorage, config *Config) error { + var bufferSize = 10240 + if config.Manager != "" { + bufferSize = 100 + } + tasks := make(chan object.Object, bufferSize) + wg := sync.WaitGroup{} + concurrent = make(chan int, config.Threads) + if config.BWLimit > 0 { + bps := float64(config.BWLimit*(1<<20)/8) * 0.85 // 15% overhead + limiter = ratelimit.NewBucketWithRate(bps, int64(bps)*3) + } + + progress := utils.NewProgress(config.Verbose || config.Quiet || config.Manager != "", true) + handled = progress.AddCountBar("Scanned objects", 0) + copied = progress.AddCountSpinner("Copied objects") + copiedBytes = progress.AddByteSpinner("Copied objects") + checkedBytes = progress.AddByteSpinner("Checked objects") + deleted = progress.AddCountSpinner("Deleted objects") + skipped = progress.AddCountSpinner("Skipped objects") + failed = progress.AddCountSpinner("Failed objects") + for i := 0; i < config.Threads; i++ { + wg.Add(1) + go func() { + defer wg.Done() + worker(tasks, src, dst, config) + }() + } + + if config.Manager == "" { + go producer(tasks, src, dst, config) + if config.Workers != nil { + addr, err := startManager(tasks) + if err != nil { + return err + } + launchWorker(addr, config, &wg) + } + } else { + go fetchJobs(tasks, config) + go func() { + for { + sendStats(config.Manager) + time.Sleep(time.Second) + } + }() + } + + wg.Wait() + progress.Done() + + if config.Manager == "" { + logger.Infof("Found: %d, copied: %d (%s), checked: %s, deleted: %d, skipped: %d, failed: %d", + handled.Current(), copied.Current(), formatSize(copiedBytes.Current()), formatSize(checkedBytes.Current()), + deleted.Current(), skipped.Current(), failed.Current()) + } else { + sendStats(config.Manager) + } + if n := failed.Current(); n > 0 { + return fmt.Errorf("Failed to handle %d objects", n) + } + return nil +} diff --git a/pkg/sync/sync_test.go b/pkg/sync/sync_test.go new file mode 100644 index 0000000..2f4e92c --- /dev/null +++ b/pkg/sync/sync_test.go @@ -0,0 +1,161 @@ +/* + * JuiceFS, Copyright 2018 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sync + +import ( + "bytes" + "math" + "reflect" + "testing" + + "github.com/juicedata/juicefs/pkg/object" +) + +func collectAll(c <-chan object.Object) []string { + r := make([]string, 0) + for s := range c { + r = append(r, s.Key()) + } + return r +} + +// nolint:errcheck +func TestIterator(t *testing.T) { + m, _ := object.CreateStorage("mem", "", "", "") + m.Put("a", bytes.NewReader([]byte("a"))) + m.Put("b", bytes.NewReader([]byte("a"))) + m.Put("aa", bytes.NewReader([]byte("a"))) + m.Put("c", bytes.NewReader([]byte("a"))) + + ch, _ := ListAll(m, "a", "b") + keys := collectAll(ch) + if len(keys) != 3 { + t.Fatalf("length should be 3, but got %d", len(keys)) + } + if !reflect.DeepEqual(keys, []string{"a", "aa", "b"}) { + t.Fatalf("result wrong: %s", keys) + } + + // Single object + s, _ := object.CreateStorage("mem", "", "", "") + s.Put("a", bytes.NewReader([]byte("a"))) + ch, _ = ListAll(s, "", "") + keys = collectAll(ch) + if !reflect.DeepEqual(keys, []string{"a"}) { + t.Fatalf("result wrong: %s", keys) + } +} + +func TestIeratorSingleEmptyKey(t *testing.T) { + // utils.SetLogLevel(logrus.DebugLevel) + + // Construct mem storage + s, _ := object.CreateStorage("mem", "", "", "") + err := s.Put("abc", bytes.NewReader([]byte("abc"))) + if err != nil { + t.Fatalf("Put error: %q", err) + } + + // Simulate command line prefix in SRC or DST + s = object.WithPrefix(s, "abc") + ch, _ := ListAll(s, "", "") + keys := collectAll(ch) + if !reflect.DeepEqual(keys, []string{""}) { + t.Fatalf("result wrong: %s", keys) + } +} +func deepEqualWithOutMtime(a, b object.Object) bool { + return a.IsDir() == b.IsDir() && a.Key() == b.Key() && a.Size() == b.Size() && + math.Abs(a.Mtime().Sub(b.Mtime()).Seconds()) < 1 +} + +// nolint:errcheck +func TestSync(t *testing.T) { + config := &Config{ + Start: "", + End: "", + Threads: 50, + Update: true, + Perms: true, + Dry: false, + DeleteSrc: false, + DeleteDst: false, + Exclude: []string{"ab.*"}, + Include: []string{"[a|b].*"}, + Verbose: false, + Quiet: true, + } + + a, _ := object.CreateStorage("file", "/tmp/a/", "", "") + a.Put("a", bytes.NewReader([]byte("a"))) + a.Put("ab", bytes.NewReader([]byte("ab"))) + a.Put("abc", bytes.NewReader([]byte("abc"))) + + b, _ := object.CreateStorage("file", "/tmp/b/", "", "") + b.Put("ba", bytes.NewReader([]byte("ba"))) + + // Copy "a" from a to b + if err := Sync(a, b, config); err != nil { + t.Fatalf("sync: %s", err) + } + if c := copied.Current(); c != 1 { + t.Fatalf("should copy 1 keys, but got %d", c) + } + + // Now a: {"a", "ab", "abc"}, b: {"a", "ba"} + // Copy "ba" from b to a + if err := Sync(b, a, config); err != nil { + t.Fatalf("sync: %s", err) + } + if c := copied.Current(); c != 1 { + t.Fatalf("should copy 1 keys, but got %d", c) + } + + // Now aRes: {"","a", "ab", "abc", "ba"}, bRes: {"","a", "ba"} + aRes, _ := a.ListAll("", "") + bRes, _ := b.ListAll("", "") + + var aObjs, bObjs []object.Object + for obj := range aRes { + aObjs = append(aObjs, obj) + } + for obj := range bRes { + bObjs = append(bObjs, obj) + } + + if !deepEqualWithOutMtime(aObjs[1], bObjs[1]) { + t.FailNow() + } + + if !deepEqualWithOutMtime(aObjs[len(aObjs)-1], bObjs[len(bObjs)-1]) { + t.FailNow() + } + + if err := Sync(a, b, config); err != nil { + t.Fatalf("sync: %s", err) + } + // No copy occured + if c := copied.Current(); c != 0 { + t.Fatalf("should copy 0 keys, but got %d", c) + } + + // Test --force-update option + config.ForceUpdate = true + // Forcibly copy {"a", "ba"} from a to b. + if err := Sync(a, b, config); err != nil { + t.Fatalf("sync: %s", err) + } +} diff --git a/pkg/usage/usage.go b/pkg/usage/usage.go new file mode 100644 index 0000000..2a83a8d --- /dev/null +++ b/pkg/usage/usage.go @@ -0,0 +1,93 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package usage + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "time" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" +) + +var reportUrl = "https://juicefs.com/report-usage" + +var logger = utils.GetLogger("juicefs") + +type usage struct { + VolumeID string `json:"volumeID"` + SessionID int64 `json:"sessionID"` + UsedSpace int64 `json:"usedBytes"` + UsedInodes int64 `json:"usedInodes"` + Version string `json:"version"` + Uptime int64 `json:"uptime"` + MetaEngine string `json:"metaEngine"` // type of meta engine + DataStore string `json:"dataStore"` // type of object store +} + +func sendUsage(u usage) error { + body, err := json.Marshal(u) + if err != nil { + return err + } + req, err := http.NewRequest("POST", reportUrl, bytes.NewReader(body)) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("got %s", resp.Status) + } + _, err = ioutil.ReadAll(resp.Body) + return err +} + +// ReportUsage will send anonymous usage data to juicefs.com to help the team +// understand how the community is using it. You can use `--no-usage-report` +// to disable this. +func ReportUsage(m meta.Meta, version string) { + ctx := meta.Background + var u usage + if format, err := m.Load(); err == nil { + u.VolumeID = format.UUID + u.DataStore = format.Storage + } + u.MetaEngine = m.Name() + u.SessionID = int64(rand.Uint32()) + u.Version = version + var start = time.Now() + for { + var totalSpace, availSpace, iused, iavail uint64 + _ = m.StatFS(ctx, &totalSpace, &availSpace, &iused, &iavail) + u.Uptime = int64(time.Since(start).Seconds()) + u.UsedSpace = int64(totalSpace - availSpace) + u.UsedInodes = int64(iused) + + if err := sendUsage(u); err != nil { + logger.Debugf("send usage: %s", err) + } + time.Sleep(time.Minute * 10) + } +} diff --git a/pkg/usage/usage_test.go b/pkg/usage/usage_test.go new file mode 100644 index 0000000..254fa81 --- /dev/null +++ b/pkg/usage/usage_test.go @@ -0,0 +1,74 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package usage + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net" + "net/http" + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/meta" +) + +// nolint:errcheck +func TestUsageReport(t *testing.T) { + // invalid addr + reportUrl = "http://127.0.0.1/report-usage" + m := meta.NewClient("memkv://", &meta.Config{}) + go ReportUsage(m, "unittest") + // wait for it to report to unavailable address, it should not panic. + time.Sleep(time.Millisecond * 100) + + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + defer l.Close() + + mux := http.NewServeMux() + var u usage + done := make(chan bool) + mux.HandleFunc("/report-usage", func(rw http.ResponseWriter, r *http.Request) { + d, _ := ioutil.ReadAll(r.Body) + _ = json.Unmarshal(d, &u) + _, _ = rw.Write([]byte("OK")) + done <- true + }) + go http.Serve(l, mux) + + addr := l.Addr().String() + reportUrl = fmt.Sprintf("http://%s/report-usage", addr) + go ReportUsage(m, "unittest") + + deadline := time.NewTimer(time.Second * 3) + select { + case <-done: + if u.MetaEngine != "memkv" { + t.Fatalf("unexpected meta engine: %s", u.MetaEngine) + } + if u.Version != "unittest" { + t.Fatalf("unexpected version: %s", u.Version) + } + case <-deadline.C: + t.Fatalf("no report after 3 seconds") + } + time.Sleep(time.Millisecond * 100) // wait for the client to finish +} diff --git a/pkg/utils/alloc.go b/pkg/utils/alloc.go new file mode 100644 index 0000000..737398b --- /dev/null +++ b/pkg/utils/alloc.go @@ -0,0 +1,82 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "fmt" + "runtime" + "sync" + "sync/atomic" + "time" +) + +var used int64 + +// Alloc returns size bytes memory from Go heap. +func Alloc(size int) []byte { + zeros := powerOf2(size) + b := *pools[zeros].Get().(*[]byte) + if cap(b) < size { + panic(fmt.Sprintf("%d < %d", cap(b), size)) + } + atomic.AddInt64(&used, int64(cap(b))) + return b[:size] +} + +// Free returns memory to Go heap. +func Free(b []byte) { + // buf could be zero length + atomic.AddInt64(&used, -int64(cap(b))) + pools[powerOf2(cap(b))].Put(&b) +} + +// AllocMemory returns the allocated memory +func AllocMemory() int64 { + return atomic.LoadInt64(&used) +} + +var pools []*sync.Pool + +func powerOf2(s int) int { + var bits int + var p int = 1 + for p < s { + bits++ + p *= 2 + } + return bits +} + +func init() { + pools = make([]*sync.Pool, 30) // 1 - 1G + for i := 0; i < 30; i++ { + func(bits int) { + pools[i] = &sync.Pool{ + New: func() interface{} { + b := make([]byte, 1< 1000 { + t.Fatal("time is not accurate") + } + c1 := Clock() + c2 := Clock() + if c2-c1 > time.Microsecond || c2-c1 == 0 { + t.Fatalf("clock is not accurate: %s", c2-c1) + } +} diff --git a/pkg/utils/clock_unix.go b/pkg/utils/clock_unix.go new file mode 100644 index 0000000..32360a0 --- /dev/null +++ b/pkg/utils/clock_unix.go @@ -0,0 +1,32 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import "time" + +var started = time.Now() + +func Now() time.Time { + return time.Now() +} + +func Clock() time.Duration { + return time.Since(started) +} diff --git a/pkg/utils/clock_windows.go b/pkg/utils/clock_windows.go new file mode 100644 index 0000000..64b9502 --- /dev/null +++ b/pkg/utils/clock_windows.go @@ -0,0 +1,76 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "syscall" + "time" + "unsafe" +) + +type clock struct { + t time.Time + tick time.Duration +} + +var last *clock + +func Now() time.Time { + c := last + return c.t.Add(Clock() - c.tick) +} + +// Clock returns the number of milliseconds that have elapsed since the program +// was started. +var Clock func() time.Duration + +func init() { + QPCTimer := func() func() time.Duration { + lib, _ := syscall.LoadLibrary("kernel32.dll") + qpc, _ := syscall.GetProcAddress(lib, "QueryPerformanceCounter") + qpf, _ := syscall.GetProcAddress(lib, "QueryPerformanceFrequency") + if qpc == 0 || qpf == 0 { + return nil + } + + var freq, start uint64 + syscall.Syscall(qpf, 1, uintptr(unsafe.Pointer(&freq)), 0, 0) + syscall.Syscall(qpc, 1, uintptr(unsafe.Pointer(&start)), 0, 0) + if freq <= 0 { + return nil + } + + freqns := float64(freq) / 1e9 + return func() time.Duration { + var now uint64 + syscall.Syscall(qpc, 1, uintptr(unsafe.Pointer(&now)), 0, 0) + return time.Duration(float64(now-start) / freqns) + } + } + if Clock = QPCTimer(); Clock == nil { + // Fallback implementation + start := time.Now() + Clock = func() time.Duration { return time.Since(start) } + } + last = &clock{time.Now(), Clock()} + go func() { + for { + last = &clock{time.Now(), Clock()} + time.Sleep(time.Hour) + } + }() +} diff --git a/pkg/utils/cond.go b/pkg/utils/cond.go new file mode 100644 index 0000000..56b412b --- /dev/null +++ b/pkg/utils/cond.go @@ -0,0 +1,90 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "sync" + "time" +) + +// Cond is similar to sync.Cond, but you can wait with a timeout. +type Cond struct { + L sync.Locker + signal chan bool + waiters int +} + +// Signal wakes up a waiter. +// It's allowed but not required for the caller to hold L. +func (c *Cond) Signal() { + select { + case c.signal <- true: + default: + } +} + +// Broadcast wake up all the waiters. +// It's required for the caller to hold L. +func (c *Cond) Broadcast() { + for c.waiters > 0 { + c.L.Unlock() + c.Signal() + c.L.Lock() + } +} + +// Wait until Signal() or Broadcast() is called. +func (c *Cond) Wait() { + c.waiters++ + c.L.Unlock() + <-c.signal + c.L.Lock() + c.waiters-- +} + +var timerPool = sync.Pool{ + New: func() interface{} { + return time.NewTimer(time.Second) + }, +} + +// WaitWithTimeout wait for a signal or a period of timeout eclipsed. +// returns true in case of timeout else false +func (c *Cond) WaitWithTimeout(d time.Duration) bool { + c.waiters++ + c.L.Unlock() + t := timerPool.Get().(*time.Timer) + t.Reset(d) + defer func() { + t.Stop() + timerPool.Put(t) + c.L.Lock() + c.waiters-- + + }() + select { + case <-c.signal: + return false + case <-t.C: + return true + } +} + +// NewCond creates a Cond. +func NewCond(lock sync.Locker) *Cond { + return &Cond{lock, make(chan bool), 0} +} diff --git a/pkg/utils/cond_test.go b/pkg/utils/cond_test.go new file mode 100644 index 0000000..19e30da --- /dev/null +++ b/pkg/utils/cond_test.go @@ -0,0 +1,98 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "sync" + "testing" + "time" +) + +func TestCond(t *testing.T) { + // test Wait and Signal + var m sync.Mutex + l := NewCond(&m) + done := make(chan bool) + var wg sync.WaitGroup + wg.Add(1) + go func() { + m.Lock() + wg.Done() + l.Wait() + m.Unlock() + + l.Signal() + done <- true + }() + wg.Wait() + m.Lock() + l.Signal() + l.Wait() + m.Unlock() + select { + case <-done: + case <-time.NewTimer(time.Second).C: + t.Fatalf("the other goroutine did not return after 1 second") + } + + // test WaitWithTimeout + var timeout bool + go func() { + m.Lock() + defer m.Unlock() + timeout = l.WaitWithTimeout(time.Millisecond * 10) + done <- true + }() + select { + case <-done: + if !timeout { + t.Fatalf("it should timeout") + } + case <-time.NewTimer(time.Second).C: + t.Fatalf("wait did not return after 1 second") + } + + // test Broadcast to wake up all goroutines + var N = 1000 + done2 := make(chan bool, N) + var wg2 sync.WaitGroup + for i := 0; i < N; i++ { + wg2.Add(1) + go func() { + m.Lock() + wg2.Done() + timeout := l.WaitWithTimeout(time.Second) + m.Unlock() + done2 <- timeout + }() + } + wg2.Wait() + m.Lock() + l.Broadcast() + m.Unlock() + deadline := time.NewTimer(time.Millisecond * 500) + for i := 0; i < N; i++ { + select { + case timeout := <-done2: + if timeout { + t.Fatalf("cond should not timeout") + } + case <-deadline.C: + t.Fatalf("not all goroutines wakeup in 500 ms") + } + } +} diff --git a/pkg/utils/logger.go b/pkg/utils/logger.go new file mode 100644 index 0000000..6f3ef0d --- /dev/null +++ b/pkg/utils/logger.go @@ -0,0 +1,127 @@ +// Copyright 2015 Ka-Hing Cheung +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "os" + "strings" + "sync" + + "github.com/mattn/go-isatty" + "github.com/sirupsen/logrus" +) + +var mu sync.Mutex +var loggers = make(map[string]*logHandle) + +var syslogHook logrus.Hook + +type logHandle struct { + logrus.Logger + + name string + lvl *logrus.Level + tty bool +} + +func (l *logHandle) Format(e *logrus.Entry) ([]byte, error) { + lvl := e.Level + if l.lvl != nil { + lvl = *l.lvl + } + lvlStr := strings.ToUpper(lvl.String()) + if l.tty { + var color int + switch lvl { + case logrus.ErrorLevel, logrus.FatalLevel, logrus.PanicLevel: + color = 31 // RED + case logrus.WarnLevel: + color = 33 // YELLOW + case logrus.InfoLevel: + color = 34 // BLUE + default: // logrus.TraceLevel, logrus.DebugLevel + color = 35 // MAGENTA + } + lvlStr = fmt.Sprintf("\033[1;%dm%s\033[0m", color, lvlStr) + } + const timeFormat = "2006/01/02 15:04:05.000000" + timestamp := e.Time.Format(timeFormat) + str := fmt.Sprintf("%v %s[%d] <%v>: %v", + timestamp, + l.name, + os.Getpid(), + lvlStr, + e.Message) + + if len(e.Data) != 0 { + str += " " + fmt.Sprint(e.Data) + } + if !strings.HasSuffix(str, "\n") { + str += "\n" + } + return []byte(str), nil +} + +// for aws.Logger +func (l *logHandle) Log(args ...interface{}) { + l.Debugln(args...) +} + +func newLogger(name string) *logHandle { + l := &logHandle{Logger: *logrus.New(), name: name, tty: isatty.IsTerminal(os.Stderr.Fd())} + l.Formatter = l + if syslogHook != nil { + l.Hooks.Add(syslogHook) + } + return l +} + +// GetLogger returns a logger mapped to `name` +func GetLogger(name string) *logHandle { + mu.Lock() + defer mu.Unlock() + + if logger, ok := loggers[name]; ok { + return logger + } + logger := newLogger(name) + loggers[name] = logger + return logger +} + +// SetLogLevel sets Level to all the loggers in the map +func SetLogLevel(lvl logrus.Level) { + for _, logger := range loggers { + logger.Level = lvl + } +} + +func DisableLogColor() { + for _, logger := range loggers { + logger.tty = false + } +} + +func SetOutFile(name string) { + file, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + return + } + for _, logger := range loggers { + logger.SetOutput(file) + logger.tty = false + } +} diff --git a/pkg/utils/logger_syslog.go b/pkg/utils/logger_syslog.go new file mode 100644 index 0000000..cf16d0c --- /dev/null +++ b/pkg/utils/logger_syslog.go @@ -0,0 +1,76 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "fmt" + "log/syslog" + "os" + + "github.com/sirupsen/logrus" + logrus_syslog "github.com/sirupsen/logrus/hooks/syslog" +) + +type SyslogHook struct { + *logrus_syslog.SyslogHook +} + +func (hook *SyslogHook) Fire(entry *logrus.Entry) error { + line, err := entry.String() + if err != nil { + fmt.Fprintf(os.Stderr, "Unable to read entry, %v", err) + return err + } + + // drop the timestamp + line = line[27:] + + switch entry.Level { + case logrus.PanicLevel: + return hook.Writer.Crit(line) + case logrus.FatalLevel: + return hook.Writer.Crit(line) + case logrus.ErrorLevel: + return hook.Writer.Err(line) + case logrus.WarnLevel: + return hook.Writer.Warning(line) + case logrus.InfoLevel: + return hook.Writer.Info(line) + case logrus.DebugLevel: + return hook.Writer.Debug(line) + default: + return nil + } +} + +func InitLoggers(logToSyslog bool) { + if logToSyslog { + hook, err := logrus_syslog.NewSyslogHook("", "", syslog.LOG_DEBUG|syslog.LOG_USER, "") + if err != nil { + // println("Unable to connect to local syslog daemon") + return + } + syslogHook = &SyslogHook{hook} + + for _, l := range loggers { + l.Hooks.Add(syslogHook) + } + } +} diff --git a/pkg/utils/logger_test.go b/pkg/utils/logger_test.go new file mode 100644 index 0000000..6217a63 --- /dev/null +++ b/pkg/utils/logger_test.go @@ -0,0 +1,58 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/sirupsen/logrus" +) + +func TestLogger(t *testing.T) { + _ = GetLogger("test") + f, err := os.CreateTemp("", "test_logger") + if err != nil { + t.Fatalf("temp file: %s", err) + } + defer f.Close() + SetOutFile("") // invalid + SetOutFile(f.Name()) + InitLoggers(true) + + SetLogLevel(logrus.TraceLevel) + SetLogLevel(logrus.DebugLevel) + SetLogLevel(logrus.InfoLevel) + SetLogLevel(logrus.ErrorLevel) + SetLogLevel(logrus.FatalLevel) + SetLogLevel(logrus.WarnLevel) + logger := GetLogger("test") + logger.Info("info level") + logger.Debug("debug level") + logger.Warnf("warn level") + logger.Error("error level") + + d, _ := ioutil.ReadFile(f.Name()) + s := string(d) + if strings.Contains(s, "info level") || strings.Contains(s, "debug level") { + t.Fatalf("info/debug should not be logged: %s", s) + } else if !strings.Contains(s, "warn level") || !strings.Contains(s, "error level") { + t.Fatalf("warn/error should be logged: %s", s) + } +} diff --git a/pkg/utils/logger_windows.go b/pkg/utils/logger_windows.go new file mode 100644 index 0000000..65913f8 --- /dev/null +++ b/pkg/utils/logger_windows.go @@ -0,0 +1,19 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +func InitLoggers(logToSyslog bool) {} diff --git a/pkg/utils/memusage.go b/pkg/utils/memusage.go new file mode 100644 index 0000000..4926ea8 --- /dev/null +++ b/pkg/utils/memusage.go @@ -0,0 +1,46 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "bytes" + "io/ioutil" + "strconv" + "syscall" +) + +func MemoryUsage() (virt, rss uint64) { + stat, err := ioutil.ReadFile("/proc/self/stat") + if err == nil { + stats := bytes.Split(stat, []byte(" ")) + if len(stats) >= 24 { + v, _ := strconv.ParseUint(string(stats[22]), 10, 64) + r, _ := strconv.ParseUint(string(stats[23]), 10, 64) + return v, r * 4096 + } + } + + var ru syscall.Rusage + err = syscall.Getrusage(syscall.RUSAGE_SELF, &ru) + if err == nil { + return uint64(ru.Maxrss), uint64(ru.Maxrss) + } + return +} diff --git a/pkg/utils/memusage_test.go b/pkg/utils/memusage_test.go new file mode 100644 index 0000000..03a54b7 --- /dev/null +++ b/pkg/utils/memusage_test.go @@ -0,0 +1,26 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import "testing" + +func TestMemUsage(t *testing.T) { + virt, rss := MemoryUsage() + if virt < (1<<20) || rss < (1<<20) || rss > (100<<20) { + t.Fatalf("invalid memory usage: virt %d, rss %d", virt, rss) + } +} diff --git a/pkg/utils/memusage_windows.go b/pkg/utils/memusage_windows.go new file mode 100644 index 0000000..804c7f1 --- /dev/null +++ b/pkg/utils/memusage_windows.go @@ -0,0 +1,77 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "os" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +type PROCESS_MEMORY_COUNTERS struct { + CB uint32 + PageFaultCount uint32 + PeakWorkingSetSize uint64 + WorkingSetSize uint64 + QuotaPeakPagedPoolUsage uint64 + QuotaPagedPoolUsage uint64 + QuotaPeakNonPagedPoolUsage uint64 + QuotaNonPagedPoolUsage uint64 + PagefileUsage uint64 + PeakPagefileUsage uint64 +} + +var ( + modpsapi = windows.NewLazySystemDLL("psapi.dll") + procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo") +) + +func getMemoryInfo(pid int32) (PROCESS_MEMORY_COUNTERS, error) { + var mem PROCESS_MEMORY_COUNTERS + c, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid)) + if err != nil { + return mem, err + } + defer windows.CloseHandle(c) + if err := getProcessMemoryInfo(c, &mem); err != nil { + return mem, err + } + + return mem, err +} + +func getProcessMemoryInfo(h windows.Handle, mem *PROCESS_MEMORY_COUNTERS) (err error) { + r1, _, e1 := syscall.Syscall(procGetProcessMemoryInfo.Addr(), 3, uintptr(h), uintptr(unsafe.Pointer(mem)), uintptr(unsafe.Sizeof(*mem))) + if r1 == 0 { + if e1 != 0 { + err = error(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func MemoryUsage() (virt, rss uint64) { + c, err := getMemoryInfo(int32(os.Getpid())) + if err == nil { + return c.PeakWorkingSetSize, c.WorkingSetSize + } + return 0, 0 +} diff --git a/pkg/utils/progress.go b/pkg/utils/progress.go new file mode 100644 index 0000000..3da9afb --- /dev/null +++ b/pkg/utils/progress.go @@ -0,0 +1,159 @@ +/* + * JuiceFS, Copyright 2022 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "os" + + "github.com/mattn/go-isatty" + "github.com/vbauerster/mpb/v7" + "github.com/vbauerster/mpb/v7/decor" +) + +type Progress struct { + *mpb.Progress + Quiet bool + showSpeed bool + bars []*mpb.Bar +} + +type Bar struct { + *mpb.Bar + total int64 +} + +func (b *Bar) IncrTotal(n int64) { // not thread safe + b.total += n + b.Bar.SetTotal(b.total, false) +} + +func (b *Bar) SetTotal(total int64) { // not thread safe + b.total = total + b.Bar.SetTotal(total, false) +} + +func (b *Bar) Done() { + b.Bar.SetTotal(0, true) +} + +type DoubleSpinner struct { + count *mpb.Bar + bytes *mpb.Bar +} + +func (s *DoubleSpinner) IncrInt64(size int64) { + s.count.Increment() + s.bytes.IncrInt64(size) +} + +func (s *DoubleSpinner) Done() { + s.count.SetTotal(0, true) + s.bytes.SetTotal(0, true) +} + +func (s *DoubleSpinner) Current() (int64, int64) { + return s.count.Current(), s.bytes.Current() +} + +func NewProgress(quiet, showSpeed bool) *Progress { + if quiet || os.Getenv("DISPLAY_PROGRESSBAR") == "false" || !isatty.IsTerminal(os.Stdout.Fd()) { + return &Progress{mpb.New(mpb.WithWidth(64), mpb.WithOutput(nil)), true, showSpeed, nil} + } else { + return &Progress{mpb.New(mpb.WithWidth(64)), false, showSpeed, nil} + } +} + +func (p *Progress) AddCountBar(name string, total int64) *Bar { + b := p.Progress.AddBar(0, // disable triggerComplete + mpb.PrependDecorators( + decor.Name(name+" count: ", decor.WCSyncWidth), + decor.CountersNoUnit("%d / %d"), + ), + mpb.AppendDecorators( + decor.OnComplete(decor.Percentage(decor.WC{W: 5}), "done"), + decor.OnComplete( + decor.AverageETA(decor.ET_STYLE_GO, decor.WC{W: 6}), "", + ), + ), + ) + b.SetTotal(total, false) + p.bars = append(p.bars, b) + return &Bar{Bar: b, total: total} +} + +func newSpinner() mpb.BarFiller { + spinnerStyle := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} + for i, s := range spinnerStyle { + spinnerStyle[i] = "\033[1;32m" + s + "\033[0m" + } + return mpb.NewBarFiller(mpb.SpinnerStyle(spinnerStyle...)) +} + +func (p *Progress) AddCountSpinner(name string) *Bar { + placeholders := []decor.WC{decor.WCSyncSpaceR} + if p.showSpeed { // no real speed; just add an empty placeholder for now + placeholders = append(placeholders, decor.WCSyncSpaceR) + } + b := p.Progress.Add(0, newSpinner(), + mpb.PrependDecorators( + decor.Name(name+" count: ", decor.WCSyncWidth), + decor.Merge(decor.CurrentNoUnit("%d", decor.WCSyncSpaceR), placeholders...), + ), + mpb.BarFillerClearOnComplete(), + ) + p.bars = append(p.bars, b) + return &Bar{Bar: b} +} + +func (p *Progress) AddByteSpinner(name string) *Bar { + decors := []decor.Decorator{ + decor.Name(name+" bytes: ", decor.WCSyncWidth), + decor.CurrentKibiByte("% .2f", decor.WCSyncSpaceR), + decor.CurrentNoUnit("(%d Bytes)", decor.WCSyncSpaceR), + } + if p.showSpeed { // FIXME: maybe use EWMA speed + decors = append(decors, decor.AverageSpeed(decor.UnitKiB, " % .2f", decor.WCSyncSpaceR)) + } + b := p.Progress.Add(0, newSpinner(), + mpb.PrependDecorators(decors...), + mpb.BarFillerClearOnComplete(), + ) + p.bars = append(p.bars, b) + return &Bar{Bar: b} +} + +func (p *Progress) AddDoubleSpinner(name string) *DoubleSpinner { + return &DoubleSpinner{ + p.AddCountSpinner(name).Bar, + p.AddByteSpinner(name).Bar, + } +} + +func (p *Progress) Done() { + for _, b := range p.bars { + if !b.Completed() { + b.SetTotal(0, true) + } + } + p.Progress.Wait() +} + +func MockProgress() (*Progress, *Bar) { + progress := NewProgress(true, false) + bar := progress.AddCountBar("Mock", 0) + return progress, bar +} diff --git a/pkg/utils/progress_test.go b/pkg/utils/progress_test.go new file mode 100644 index 0000000..8b5f65c --- /dev/null +++ b/pkg/utils/progress_test.go @@ -0,0 +1,58 @@ +/* + * JuiceFS, Copyright 2022 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "testing" + "time" +) + +func TestProgresBar(t *testing.T) { + p := NewProgress(true, false) + bar := p.AddCountBar("Bar", 0) + cp := p.AddCountSpinner("Spinner") + bp := p.AddByteSpinner("Spinner") + bar.SetTotal(50) + for i := 0; i < 100; i++ { + time.Sleep(time.Millisecond) + bar.Increment() + if i%2 == 0 { + bar.IncrTotal(1) + cp.Increment() + bp.IncrInt64(1024) + } + } + bar.Done() + p.Done() + if bar.Current() != 100 || cp.Current() != 50 || bp.Current() != 50*1024 { + t.Fatalf("Final values: bar %d, count %d, bytes: %d", bar.Current(), cp.Current(), bp.Current()) + } + + p = NewProgress(true, true) + dp := p.AddDoubleSpinner("Spinner") + go func() { + for i := 0; i < 100; i++ { + time.Sleep(time.Millisecond) + dp.IncrInt64(1024) + } + dp.Done() + }() + p.Wait() + if c, b := dp.Current(); c != 100 || b != 102400 { + t.Fatalf("Final values: count %d, bytes %d", c, b) + } +} diff --git a/pkg/utils/rusage.go b/pkg/utils/rusage.go new file mode 100644 index 0000000..79c5963 --- /dev/null +++ b/pkg/utils/rusage.go @@ -0,0 +1,43 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import "syscall" + +type Rusage struct { + syscall.Rusage +} + +// GetUtime returns the user time in seconds. +func (ru *Rusage) GetUtime() float64 { + return float64(ru.Utime.Sec) + float64(ru.Utime.Usec)/1e6 +} + +// GetStime returns the system time in seconds. +func (ru *Rusage) GetStime() float64 { + return float64(ru.Stime.Sec) + float64(ru.Stime.Usec)/1e6 +} + +// GetRusage returns CPU usage of current process. +func GetRusage() *Rusage { + var ru syscall.Rusage + _ = syscall.Getrusage(syscall.RUSAGE_SELF, &ru) + return &Rusage{ru} +} diff --git a/pkg/utils/rusage_test.go b/pkg/utils/rusage_test.go new file mode 100644 index 0000000..7f88e9d --- /dev/null +++ b/pkg/utils/rusage_test.go @@ -0,0 +1,38 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "testing" + "time" +) + +func TestRUsage(t *testing.T) { + u := GetRusage() + var s string + for i := 0; i < 1000; i++ { + s += time.Now().String() + } + // don't optimize the loop + if len(s) < 10 { + panic("unreachable") + } + u2 := GetRusage() + if u2.GetUtime()-u.GetUtime() < 0.0001 { + t.Fatalf("invalid utime: %f", u2.GetStime()-u.GetStime()) + } +} diff --git a/pkg/utils/rusage_windows.go b/pkg/utils/rusage_windows.go new file mode 100644 index 0000000..3b5898a --- /dev/null +++ b/pkg/utils/rusage_windows.go @@ -0,0 +1,42 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import "golang.org/x/sys/windows" + +type Rusage struct { + kernel windows.Filetime + user windows.Filetime +} + +func (ru *Rusage) GetUtime() float64 { + return float64((int64(ru.user.HighDateTime)<<32)+int64(ru.user.LowDateTime)) / 10 / 1e6 +} + +func (ru *Rusage) GetStime() float64 { + return float64((int64(ru.kernel.HighDateTime)<<32)+int64(ru.kernel.LowDateTime)) / 10 / 1e6 +} + +func GetRusage() *Rusage { + h := windows.CurrentProcess() + var creation, exit, kernel, user windows.Filetime + err := windows.GetProcessTimes(h, &creation, &exit, &kernel, &user) + if err == nil { + return &Rusage{kernel, user} + } + return nil +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go new file mode 100644 index 0000000..316556c --- /dev/null +++ b/pkg/utils/utils.go @@ -0,0 +1,91 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "fmt" + "net" + "os" + "strings" + "time" +) + +// Min returns min of 2 int +func Min(a, b int) int { + if a < b { + return a + } + return b +} + +// Exists checks if the file/folder in given path exists +func Exists(path string) bool { + _, err := os.Stat(path) + return err == nil || !os.IsNotExist(err) +} + +// SplitDir splits a path with default path list separator or comma. +func SplitDir(d string) []string { + dd := strings.Split(d, string(os.PathListSeparator)) + if len(dd) == 1 { + dd = strings.Split(dd[0], ",") + } + return dd +} + +// GetLocalIp get the local ip used to access remote address. +func GetLocalIp(address string) (string, error) { + conn, err := net.Dial("udp", address) + if err != nil { + return "", err + } + ip, _, err := net.SplitHostPort(conn.LocalAddr().String()) + if err != nil { + return "", err + } + return ip, nil +} + +func WithTimeout(f func() error, timeout time.Duration) error { + var done = make(chan int, 1) + var t = time.NewTimer(timeout) + var err error + go func() { + err = f() + done <- 1 + }() + select { + case <-done: + t.Stop() + case <-t.C: + err = fmt.Errorf("timeout after %s", timeout) + } + return err +} + +func RemovePassword(uri string) string { + p := strings.Index(uri, "@") + if p < 0 { + return uri + } + sp := strings.Index(uri, "://") + cp := strings.Index(uri[sp+3:], ":") + if cp < 0 || sp+3+cp > p { + return uri + } + return uri[:sp+3+cp] + ":****" + uri[p:] +} diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go new file mode 100644 index 0000000..06dbd92 --- /dev/null +++ b/pkg/utils/utils_test.go @@ -0,0 +1,84 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "strings" + "testing" + "time" +) + +func TestMin(t *testing.T) { + assertEqual(t, Min(1, 2), 1) + assertEqual(t, Min(-1, -2), -2) + assertEqual(t, Min(0, 0), 0) +} + +func TestExists(t *testing.T) { + assertEqual(t, Exists("/"), true) + assertEqual(t, Exists("/not_exist_path"), false) +} + +func TestSplitDir(t *testing.T) { + assertEqual(t, SplitDir("/a:/b"), []string{"/a", "/b"}) + assertEqual(t, SplitDir("a,/b"), []string{"a", "/b"}) + assertEqual(t, SplitDir("/a;b"), []string{"/a;b"}) + assertEqual(t, SplitDir("a/b"), []string{"a/b"}) +} + +func TestGetInode(t *testing.T) { + _, err := GetFileInode("") + if err == nil { + t.Fatalf("invalid path should fail") + } + ino, err := GetFileInode("/") + if err != nil { + t.Fatalf("get file inode: %s", err) + } else if ino > 2 { + t.Fatalf("inode of root should be 1/2, but got %d", ino) + } +} + +func TestLocalIp(t *testing.T) { + _, err := GetLocalIp("127.0.0.1") + if err == nil { + t.Fatalf("should fail with invalid address") + } + ip, err := GetLocalIp("127.0.0.1:22") + if err != nil { + t.Fatalf("get local ip: %s", err) + } + if ip != "127.0.0.1" { + t.Fatalf("local ip should be 127.0.0.1, bug got %s", ip) + } +} + +func TestTimeout(t *testing.T) { + err := WithTimeout(func() error { + return nil + }, time.Millisecond*10) + if err != nil { + t.Fatalf("fast function should return nil") + } + err = WithTimeout(func() error { + time.Sleep(time.Millisecond * 100) + return nil + }, time.Millisecond*10) + if err == nil || !strings.HasPrefix(err.Error(), "timeout after") { + t.Fatalf("slow function should be timeout: %s", err) + } +} diff --git a/pkg/utils/utils_unix.go b/pkg/utils/utils_unix.go new file mode 100644 index 0000000..8ae9fea --- /dev/null +++ b/pkg/utils/utils_unix.go @@ -0,0 +1,36 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "os" + "syscall" +) + +func GetFileInode(path string) (uint64, error) { + fi, err := os.Stat(path) + if err != nil { + return 0, err + } + if sst, ok := fi.Sys().(*syscall.Stat_t); ok { + return sst.Ino, nil + } + return 0, nil +} diff --git a/pkg/utils/utils_windows.go b/pkg/utils/utils_windows.go new file mode 100644 index 0000000..a62cf2f --- /dev/null +++ b/pkg/utils/utils_windows.go @@ -0,0 +1,38 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package utils + +import ( + "os" + + "golang.org/x/sys/windows" +) + +func GetFileInode(path string) (uint64, error) { + // FIXME support directory + fd, err := windows.Open(path, os.O_RDONLY, 0) + if err != nil { + return 0, err + } + defer windows.Close(fd) + var data windows.ByHandleFileInformation + err = windows.GetFileInformationByHandle(fd, &data) + if err != nil { + return 0, err + } + return uint64(data.FileIndexHigh)<<32 + uint64(data.FileIndexLow), nil +} diff --git a/pkg/version/.gitattributes b/pkg/version/.gitattributes new file mode 100644 index 0000000..6192840 --- /dev/null +++ b/pkg/version/.gitattributes @@ -0,0 +1 @@ +version.go export-subst diff --git a/pkg/version/version.go b/pkg/version/version.go new file mode 100644 index 0000000..d725f61 --- /dev/null +++ b/pkg/version/version.go @@ -0,0 +1,31 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package version + +import "fmt" + +var ( + version = "1.0-dev" + revision = "$Format:%h$" + revisionDate = "$Format:%as$" +) + +// Version returns version in format - `VERSION (REVISIONDATE REVISION)` +// value is assigned in Makefile +func Version() string { + return fmt.Sprintf("%v (%v %v)", version, revisionDate, revision) +} diff --git a/pkg/vfs/accesslog.go b/pkg/vfs/accesslog.go new file mode 100644 index 0000000..3c29d7b --- /dev/null +++ b/pkg/vfs/accesslog.go @@ -0,0 +1,122 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "fmt" + "sync" + "time" + + "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + opsDurationsHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "fuse_ops_durations_histogram_seconds", + Help: "Operations latency distributions.", + Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30), + }) +) + +type logReader struct { + sync.Mutex + buffer chan []byte + last []byte +} + +var ( + readerLock sync.Mutex + readers map[uint64]*logReader +) + +func init() { + readers = make(map[uint64]*logReader) +} + +func logit(ctx Context, format string, args ...interface{}) { + used := ctx.Duration() + opsDurationsHistogram.Observe(used.Seconds()) + readerLock.Lock() + defer readerLock.Unlock() + if len(readers) == 0 && used < time.Second*10 { + return + } + + cmd := fmt.Sprintf(format, args...) + t := utils.Now() + ts := t.Format("2006.01.02 15:04:05.000000") + cmd += fmt.Sprintf(" <%.6f>", used.Seconds()) + if ctx.Pid() != 0 && used >= time.Second*10 { + logger.Infof("slow operation: %s", cmd) + } + line := []byte(fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd)) + + for _, r := range readers { + select { + case r.buffer <- line: + default: + } + } +} + +func openAccessLog(fh uint64) uint64 { + readerLock.Lock() + defer readerLock.Unlock() + readers[fh] = &logReader{buffer: make(chan []byte, 10240)} + return fh +} + +func closeAccessLog(fh uint64) { + readerLock.Lock() + defer readerLock.Unlock() + delete(readers, fh) +} + +func readAccessLog(fh uint64, buf []byte) int { + readerLock.Lock() + r, ok := readers[fh] + readerLock.Unlock() + if !ok { + return 0 + } + r.Lock() + defer r.Unlock() + var n int + if len(r.last) > 0 { + n = copy(buf, r.last) + r.last = r.last[n:] + } + var t = time.NewTimer(time.Second) + defer t.Stop() + for n < len(buf) { + select { + case line := <-r.buffer: + l := copy(buf[n:], line) + n += l + if l < len(line) { + r.last = line[l:] + } + case <-t.C: + if n == 0 { + n = copy(buf, "#\n") + } + return n + } + } + return n +} diff --git a/pkg/vfs/accesslog_test.go b/pkg/vfs/accesslog_test.go new file mode 100644 index 0000000..b3b3321 --- /dev/null +++ b/pkg/vfs/accesslog_test.go @@ -0,0 +1,73 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/meta" +) + +func TestAccessLog(t *testing.T) { + openAccessLog(1) + defer closeAccessLog(1) + + ctx := NewLogContext(meta.NewContext(10, 1, []uint32{2})) + logit(ctx, "test") + + n := readAccessLog(2, nil) + if n != 0 { + t.Fatalf("invalid fd") + } + + now := time.Now() + // partial read + buf := make([]byte, 1024) + n = readAccessLog(1, buf[:10]) + if n != 10 { + t.Fatalf("partial read: %d", n) + } + if time.Since(now) > time.Millisecond*10 { + t.Fatalf("should not block") + } + + // read whole line, block for 1 second + n = readAccessLog(1, buf[10:]) + if n != 54 { + t.Fatalf("partial read: %d", n) + } + logs := string(buf[:10+n]) + + // check format + ts, err := time.Parse("2006.01.02 15:04:05.000000", logs[:26]) + if err != nil { + t.Fatalf("invalid time %s: %s", logs, err) + } + if now.Sub(ts.Local()) > time.Millisecond*10 { + t.Fatalf("stale time: %s now: %s", ts, time.Now()) + } + if logs[26:len(logs)-4] != " [uid:1,gid:2,pid:10] test <0.0000" { + t.Fatalf("unexpected log: %q", logs[26:]) + } + + // block read + n = readAccessLog(1, buf) + if n != 2 || string(buf[:2]) != "#\n" { + t.Fatalf("expected line: %q", string(buf[:n])) + } +} diff --git a/pkg/vfs/backup.go b/pkg/vfs/backup.go new file mode 100644 index 0000000..f8ee585 --- /dev/null +++ b/pkg/vfs/backup.go @@ -0,0 +1,164 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "compress/gzip" + "io" + "os" + "sort" + "time" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + osync "github.com/juicedata/juicefs/pkg/sync" +) + +// Backup metadata periodically in the object storage +func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) { + ctx := meta.Background + key := "lastBackup" + for { + time.Sleep(interval / 10) + var value []byte + if st := m.GetXattr(ctx, 1, key, &value); st != 0 && st != meta.ENOATTR { + logger.Warnf("getxattr inode 1 key %s: %s", key, st) + continue + } + var last time.Time + var err error + if len(value) > 0 { + last, err = time.Parse(time.RFC3339, string(value)) + } + if err != nil { + logger.Warnf("parse time value %s: %s", value, err) + continue + } + if now := time.Now(); now.Sub(last) >= interval { + var iused, dummy uint64 + _ = m.StatFS(ctx, &dummy, &dummy, &iused, &dummy) + if iused/5e6 > uint64(interval/time.Hour) { + logger.Infof("backup metadata skipped because of too many inodes: %d %s", iused, interval) + continue + } + if st := m.SetXattr(ctx, 1, key, []byte(now.Format(time.RFC3339)), meta.XattrCreateOrReplace); st != 0 { + logger.Warnf("setxattr inode 1 key %s: %s", key, st) + continue + } + go cleanupBackups(blob, now) + logger.Debugf("backup metadata started") + if err = backup(m, blob, now); err == nil { + logger.Infof("backup metadata succeed, used %s", time.Since(now)) + } else { + logger.Warnf("backup metadata failed: %s", err) + } + } + } +} + +func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error { + name := "dump-" + now.UTC().Format("2006-01-02-150405") + ".json.gz" + fpath := "/tmp/juicefs-meta-" + name + fp, err := os.OpenFile(fpath, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0444) + if err != nil { + return err + } + defer os.Remove(fpath) + defer fp.Close() + zw := gzip.NewWriter(fp) + err = m.DumpMeta(zw, 1) // force dump the whole tree + _ = zw.Close() + if err != nil { + return err + } + if _, err = fp.Seek(0, io.SeekStart); err != nil { + return err + } + return blob.Put("meta/"+name, fp) +} + +func cleanupBackups(blob object.ObjectStorage, now time.Time) { + blob = object.WithPrefix(blob, "meta/") + ch, err := osync.ListAll(blob, "", "") + if err != nil { + logger.Warnf("listAll prefix meta/: %s", err) + return + } + var objs []string + for o := range ch { + objs = append(objs, o.Key()) + } + + toDel := rotate(objs, now) + for _, o := range toDel { + if err = blob.Delete(o); err != nil { + logger.Warnf("delete object %s: %s", o, err) + } + } +} + +// Cleanup policy: +// 1. keep all backups within 2 days +// 2. keep one backup each day within 2 weeks +// 3. keep one backup each week within 2 months +// 4. keep one backup each month for those before 2 months +func rotate(objs []string, now time.Time) []string { + var days = 2 + edge := now.UTC().AddDate(0, 0, -days) + next := func() { + if days < 14 { + days++ + edge = edge.AddDate(0, 0, -1) + } else if days < 60 { + days += 7 + edge = edge.AddDate(0, 0, -7) + } else { + days += 30 + edge = edge.AddDate(0, 0, -30) + } + } + + var toDel, within []string + sort.Strings(objs) + for i := len(objs) - 1; i >= 0; i-- { + if len(objs[i]) != 30 { // len("dump-2006-01-02-150405.json.gz") + logger.Warnf("bad object for metadata backup %s: length %d", objs[i], len(objs[i])) + continue + } + ts, err := time.Parse("2006-01-02-150405", objs[i][5:22]) + if err != nil { + logger.Warnf("bad object for metadata backup %s: %s", objs[i], err) + continue + } + + if ts.Before(edge) { + if l := len(within); l > 0 { // keep the earliest one + toDel = append(toDel, within[:l-1]...) + within = within[:0] + } + for next(); ts.Before(edge); next() { + } + within = append(within, objs[i]) + } else if days > 2 { + within = append(within, objs[i]) + } + } + if l := len(within); l > 0 { + toDel = append(toDel, within[:l-1]...) + } + return toDel +} diff --git a/pkg/vfs/backup_test.go b/pkg/vfs/backup_test.go new file mode 100644 index 0000000..9e24495 --- /dev/null +++ b/pkg/vfs/backup_test.go @@ -0,0 +1,84 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "testing" + "time" + + "github.com/juicedata/juicefs/pkg/object" + osync "github.com/juicedata/juicefs/pkg/sync" +) + +func TestRotate(t *testing.T) { + format := func(ts time.Time) string { + return "dump-" + ts.UTC().Format("2006-01-02-150405") + ".json.gz" + } + + now := time.Now() + objs := make([]string, 0, 25) + for cursor, i := now.AddDate(0, 0, -100), 0; i <= 200; i++ { // one backup for every half day + objs = append(objs, format(cursor)) + toDel := rotate(objs, cursor) + for _, d := range toDel { + for j, k := range objs { + if k == d { + objs = append(objs[:j], objs[j+1:]...) + break + } + } + } + cursor = cursor.Add(time.Duration(12) * time.Hour) + } + + expect := make([]string, 0, 25) + expect = append(expect, format(now.AddDate(0, 0, -100))) + for days := 65; days > 14; days -= 7 { + expect = append(expect, format(now.AddDate(0, 0, -days))) + } + for days := 13; days > 2; days-- { + expect = append(expect, format(now.AddDate(0, 0, -days))) + } + for i := 4; i >= 0; i-- { + expect = append(expect, format(now.Add(time.Duration(-i*12)*time.Hour))) + } + + if len(objs) != len(expect) { + t.Fatalf("length of objs %d != length of expect %d", len(objs), len(expect)) + } + for i, o := range objs { + if o != expect[i] { + t.Fatalf("obj %s != expect %s", o, expect[i]) + } + } +} + +func TestBackup(t *testing.T) { + v, blob := createTestVFS() + go Backup(v.Meta, blob, time.Millisecond*100) + time.Sleep(time.Millisecond * 100) + + blob = object.WithPrefix(blob, "meta/") + kc, _ := osync.ListAll(blob, "", "") + var keys []string + for obj := range kc { + keys = append(keys, obj.Key()) + } + if len(keys) < 1 { + t.Fatalf("there should be at least 1 backup file") + } +} diff --git a/pkg/vfs/compact.go b/pkg/vfs/compact.go new file mode 100644 index 0000000..fb93ce1 --- /dev/null +++ b/pkg/vfs/compact.go @@ -0,0 +1,109 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "context" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + compactSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "compact_size_histogram_bytes", + Help: "Distribution of size of compacted data in bytes.", + Buckets: prometheus.ExponentialBuckets(1024, 2, 16), + }) +) + +func readSlice(store chunk.ChunkStore, s *meta.Slice, page *chunk.Page, off int) error { + buf := page.Data + read := 0 + reader := store.NewReader(s.Chunkid, int(s.Size)) + for read < len(buf) { + p := page.Slice(read, len(buf)-read) + n, err := reader.ReadAt(context.Background(), p, off+int(s.Off)) + p.Release() + if n == 0 && err != nil { + return err + } + read += n + off += n + } + return nil +} + +func Compact(conf chunk.Config, store chunk.ChunkStore, slices []meta.Slice, chunkid uint64) error { + for utils.AllocMemory()-store.UsedMemory() > int64(conf.BufferSize)*3/2 { + time.Sleep(time.Millisecond * 100) + } + var size uint32 + for _, s := range slices { + size += s.Len + } + compactSizeHistogram.Observe(float64(size)) + logger.Debugf("compact %d slices (%d bytes) to chunk %d", len(slices), size, chunkid) + + writer := store.NewWriter(chunkid) + + var pos int + for i, s := range slices { + if s.Chunkid == 0 { + _, err := writer.WriteAt(make([]byte, int(s.Len)), int64(pos)) + if err != nil { + writer.Abort() + return err + } + pos += int(s.Len) + continue + } + var read int + for read < int(s.Len) { + l := utils.Min(conf.BlockSize, int(s.Len)-read) + p := chunk.NewOffPage(l) + if err := readSlice(store, &slices[i], p, read); err != nil { + logger.Debugf("can't compact chunk %d, retry later, read %d: %s", chunkid, i, err) + p.Release() + writer.Abort() + return err + } + _, err := writer.WriteAt(p.Data, int64(pos+read)) + p.Release() + if err != nil { + logger.Errorf("can't compact chunk %d, retry later, write: %s", chunkid, err) + writer.Abort() + return err + } + read += l + if pos+read >= conf.BlockSize { + if err = writer.FlushTo(pos + read); err != nil { + panic(err) + } + } + } + pos += int(s.Len) + } + err := writer.Finish(pos) + if err != nil { + writer.Abort() + } + return err +} diff --git a/pkg/vfs/compact_test.go b/pkg/vfs/compact_test.go new file mode 100644 index 0000000..18732a2 --- /dev/null +++ b/pkg/vfs/compact_test.go @@ -0,0 +1,97 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "context" + "testing" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" +) + +func TestCompact(t *testing.T) { + cconf := chunk.Config{ + BlockSize: 256 * 1024, + Compress: "lz4", + MaxUpload: 2, + BufferSize: 30 << 20, + CacheSize: 10, + CacheDir: "memory", + } + blob, _ := object.CreateStorage("mem", "", "", "") + store := chunk.NewCachedStore(blob, cconf) + + // prepare the slices + var slices []meta.Slice + var total int + for i := 0; i < 100; i++ { + buf := make([]byte, 100+i*100) + for j := range buf { + buf[j] = byte(i) + } + cid := uint64(i) + w := store.NewWriter(cid) + if n, e := w.WriteAt(buf, 0); e != nil { + t.Fatalf("write chunk %d: %s", cid, e) + } else { + total += n + } + if e := w.Finish(len(buf)); e != nil { + t.Fatalf("flush chunk %d: %s", cid, e) + } + slices = append(slices, meta.Slice{Chunkid: cid, Size: uint32(len(buf)), Len: uint32(len(buf))}) + } + + // compact + var cid uint64 = 1000 + err := Compact(cconf, store, slices, cid) + if err != nil { + t.Fatalf("compact %d slices : %s", len(slices), err) + } + + // verify result + r := store.NewReader(cid, total) + var off int + for i := 0; i < 100; i++ { + buf := make([]byte, 100+i*100) + page := chunk.NewPage(buf) + n, err := r.ReadAt(context.Background(), page, off) + if err != nil { + t.Fatalf("read chunk %d at %d: %s", cid, off, err) + } else if n != len(buf) { + t.Fatalf("short read: %d", n) + } + for j := range buf { + if buf[j] != byte(i) { + t.Fatalf("invalid byte at %d: %d !=%d", j, buf[j], i) + } + } + off += len(buf) + defer page.Release() + } + + // failed + _ = store.Remove(1, 200) + err = Compact(cconf, store, slices, cid) + if err == nil { + t.Fatalf("compact should fail with read but got nil") + } + + // TODO: inject write failure +} diff --git a/pkg/vfs/fill.go b/pkg/vfs/fill.go new file mode 100644 index 0000000..3f990a8 --- /dev/null +++ b/pkg/vfs/fill.go @@ -0,0 +1,171 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "fmt" + "path" + "strings" + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/meta" +) + +type _file struct { + ino Ino + size uint64 +} + +func (v *VFS) fillCache(paths []string, concurrent int) { + logger.Infof("start to warmup %d paths with %d workers", len(paths), concurrent) + start := time.Now() + todo := make(chan _file, 10240) + wg := sync.WaitGroup{} + for i := 0; i < concurrent; i++ { + wg.Add(1) + go func() { + for { + f := <-todo + if f.ino == 0 { + break + } + err := v.fillInode(f.ino, f.size) + if err != nil { // TODO: print path instead of inode + logger.Errorf("Inode %d could be corrupted: %s", f.ino, err) + } + } + wg.Done() + }() + } + + var inode Ino + var attr = &Attr{} + for _, p := range paths { + if st := v.resolve(p, &inode, attr); st != 0 { + logger.Warnf("Failed to resolve path %s: %s", p, st) + continue + } + logger.Debugf("Warming up path %s", p) + if attr.Typ == meta.TypeDirectory { + v.walkDir(inode, todo) + } else if attr.Typ == meta.TypeFile { + todo <- _file{inode, attr.Length} + } + } + close(todo) + wg.Wait() + logger.Infof("Warmup %d paths in %s", len(paths), time.Since(start)) +} + +func (v *VFS) resolve(p string, inode *Ino, attr *Attr) syscall.Errno { + p = strings.Trim(p, "/") + ctx := meta.Background + err := v.Meta.Resolve(ctx, 1, p, inode, attr) + if err != syscall.ENOTSUP { + return err + } + + // Fallback to the default implementation that calls `m.Lookup` for each directory along the path. + // It might be slower for deep directories, but it works for every meta that implements `Lookup`. + parent := Ino(1) + ss := strings.Split(p, "/") + for i, name := range ss { + if len(name) == 0 { + continue + } + if parent == 1 && i == len(ss)-1 && IsSpecialName(name) { + *inode, attr = GetInternalNodeByName(name) + parent = *inode + break + } + if i > 0 { + if err = v.Meta.Access(ctx, parent, MODE_MASK_R|MODE_MASK_X, attr); err != 0 { + return err + } + } + if err = v.Meta.Lookup(ctx, parent, name, inode, attr); err != 0 { + return err + } + if attr.Typ == meta.TypeSymlink { + var buf []byte + if err = v.Meta.ReadLink(ctx, *inode, &buf); err != 0 { + return err + } + target := string(buf) + if strings.HasPrefix(target, "/") || strings.Contains(target, "://") { + return syscall.ENOTSUP + } + target = path.Join(strings.Join(ss[:i], "/"), target) + if err = v.resolve(target, inode, attr); err != 0 { + return err + } + } + parent = *inode + } + if parent == 1 { + *inode = parent + if err = v.Meta.GetAttr(ctx, *inode, attr); err != 0 { + return err + } + } + return 0 +} + +func (v *VFS) walkDir(inode Ino, todo chan _file) { + pending := make([]Ino, 1) + pending[0] = inode + for len(pending) > 0 { + l := len(pending) + l-- + inode = pending[l] + pending = pending[:l] + var entries []*meta.Entry + r := v.Meta.Readdir(meta.Background, inode, 1, &entries) + if r == 0 { + for _, f := range entries { + name := string(f.Name) + if name == "." || name == ".." { + continue + } + if f.Attr.Typ == meta.TypeDirectory { + pending = append(pending, f.Inode) + } else if f.Attr.Typ != meta.TypeSymlink { + todo <- _file{f.Inode, f.Attr.Length} + } + } + } else { + logger.Warnf("readdir %d: %s", inode, r) + } + } +} + +func (v *VFS) fillInode(inode Ino, size uint64) error { + var slices []meta.Slice + for indx := uint64(0); indx*meta.ChunkSize < size; indx++ { + if st := v.Meta.Read(meta.Background, inode, uint32(indx), &slices); st != 0 { + return fmt.Errorf("Failed to get slices of inode %d index %d: %d", inode, indx, st) + } + for _, s := range slices { + if err := v.Store.FillCache(s.Chunkid, s.Size); err != nil { + return fmt.Errorf("Failed to cache inode %d slice %d: %s", inode, s.Chunkid, err) + } + } + } + return nil +} diff --git a/pkg/vfs/fill_test.go b/pkg/vfs/fill_test.go new file mode 100644 index 0000000..a0212d5 --- /dev/null +++ b/pkg/vfs/fill_test.go @@ -0,0 +1,49 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "os" + "testing" + + "github.com/juicedata/juicefs/pkg/meta" +) + +func TestFill(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.Background) + entry, _ := v.Mkdir(ctx, 1, "test", 0777, 022) + fe, fh, _ := v.Create(ctx, entry.Inode, "file", 0644, 0, uint32(os.O_WRONLY)) + _ = v.Write(ctx, fe.Inode, []byte("hello"), 0, fh) + _ = v.Flush(ctx, fe.Inode, fh, 0) + v.Release(ctx, fe.Inode, fh) + _, _ = v.Symlink(ctx, "test/file", 1, "sym") + _, _ = v.Symlink(ctx, "/tmp/testfile", 1, "sym2") + _, _ = v.Symlink(ctx, "testfile", 1, "sym3") + + // normal cases + v.fillCache([]string{"/test/file", "/test", "/sym", "/"}, 2) + + // remove chunk + var slices []meta.Slice + _ = v.Meta.Read(meta.Background, fe.Inode, 0, &slices) + for _, s := range slices { + _ = v.Store.Remove(s.Chunkid, int(s.Size)) + } + // bad cases + v.fillCache([]string{"/test/file", "/sym2", "/sym3", "/.stats", "/not_exists"}, 2) +} diff --git a/pkg/vfs/handle.go b/pkg/vfs/handle.go new file mode 100644 index 0000000..c697a7d --- /dev/null +++ b/pkg/vfs/handle.go @@ -0,0 +1,222 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" +) + +type handle struct { + sync.Mutex + inode Ino + fh uint64 + + // for dir + children []*meta.Entry + + // for file + locks uint8 + flockOwner uint64 // kernel 3.1- does not pass lock_owner in release() + reader FileReader + writer FileWriter + ops []Context + + // rwlock + writing uint32 + readers uint32 + writers uint32 + cond *utils.Cond + + // internal files + off uint64 + data []byte + pending []byte +} + +func (h *handle) addOp(ctx Context) { + h.Lock() + defer h.Unlock() + h.ops = append(h.ops, ctx) +} + +func (h *handle) removeOp(ctx Context) { + h.Lock() + defer h.Unlock() + for i, c := range h.ops { + if c == ctx { + h.ops[i] = h.ops[len(h.ops)-1] + h.ops = h.ops[:len(h.ops)-1] + break + } + } +} + +func (h *handle) cancelOp(pid uint32) { + if pid == 0 { + return + } + h.Lock() + defer h.Unlock() + for _, c := range h.ops { + if c.Pid() == pid || c.Pid() > 0 && c.Duration() > time.Second { + c.Cancel() + } + } +} + +func (h *handle) Rlock(ctx Context) bool { + h.Lock() + for (h.writing | h.writers) != 0 { + if h.cond.WaitWithTimeout(time.Second) && ctx.Canceled() { + h.Unlock() + logger.Warnf("read lock %d interrupted", h.inode) + return false + } + } + h.readers++ + h.Unlock() + h.addOp(ctx) + return true +} + +func (h *handle) Runlock() { + h.Lock() + h.readers-- + if h.readers == 0 { + h.cond.Broadcast() + } + h.Unlock() +} + +func (h *handle) Wlock(ctx Context) bool { + h.Lock() + h.writers++ + for (h.readers | h.writing) != 0 { + if h.cond.WaitWithTimeout(time.Second) && ctx.Canceled() { + h.writers-- + h.Unlock() + logger.Warnf("write lock %d interrupted", h.inode) + return false + } + } + h.writers-- + h.writing = 1 + h.Unlock() + h.addOp(ctx) + return true +} + +func (h *handle) Wunlock() { + h.Lock() + h.writing = 0 + h.cond.Broadcast() + h.Unlock() +} + +func (h *handle) Close() { + if h.reader != nil { + h.reader.Close(meta.Background) + h.reader = nil + } + if h.writer != nil { + _ = h.writer.Close(meta.Background) + h.writer = nil + } +} + +func (v *VFS) newHandle(inode Ino) *handle { + v.hanleM.Lock() + defer v.hanleM.Unlock() + fh := v.nextfh + h := &handle{inode: inode, fh: fh} + v.nextfh++ + h.cond = utils.NewCond(h) + v.handles[inode] = append(v.handles[inode], h) + return h +} + +func (v *VFS) findAllHandles(inode Ino) []*handle { + v.hanleM.Lock() + defer v.hanleM.Unlock() + return v.handles[inode] +} + +func (v *VFS) findHandle(inode Ino, fh uint64) *handle { + v.hanleM.Lock() + defer v.hanleM.Unlock() + for _, f := range v.handles[inode] { + if f.fh == fh { + return f + } + } + return nil +} + +func (v *VFS) releaseHandle(inode Ino, fh uint64) { + v.hanleM.Lock() + defer v.hanleM.Unlock() + hs := v.handles[inode] + for i, f := range hs { + if f.fh == fh { + if i+1 < len(hs) { + hs[i] = hs[len(hs)-1] + } + if len(hs) > 1 { + v.handles[inode] = hs[:len(hs)-1] + } else { + delete(v.handles, inode) + } + break + } + } +} + +func (v *VFS) newFileHandle(inode Ino, length uint64, flags uint32) uint64 { + h := v.newHandle(inode) + h.Lock() + defer h.Unlock() + switch flags & O_ACCMODE { + case syscall.O_RDONLY: + h.reader = v.reader.Open(inode, length) + case syscall.O_WRONLY: // FUSE writeback_cache mode need reader even for WRONLY + fallthrough + case syscall.O_RDWR: + h.reader = v.reader.Open(inode, length) + h.writer = v.writer.Open(inode, length) + } + return h.fh +} + +func (v *VFS) releaseFileHandle(ino Ino, fh uint64) { + h := v.findHandle(ino, fh) + if h != nil { + h.Lock() + // rwlock_wait_for_unlock: + for (h.writing | h.writers | h.readers) != 0 { + h.cond.WaitWithTimeout(time.Millisecond * 100) + } + h.writing = 1 // for remove + h.Unlock() + h.Close() + v.releaseHandle(ino, fh) + } +} diff --git a/pkg/vfs/helpers.go b/pkg/vfs/helpers.go new file mode 100644 index 0000000..80de4ac --- /dev/null +++ b/pkg/vfs/helpers.go @@ -0,0 +1,113 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "fmt" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/meta" +) + +const ( + MODE_MASK_R = 4 + MODE_MASK_W = 2 + MODE_MASK_X = 1 +) + +func strerr(errno syscall.Errno) string { + if errno == 0 { + return "OK" + } + return errno.Error() +} + +var typestr = map[uint16]byte{ + syscall.S_IFSOCK: 's', + syscall.S_IFLNK: 'l', + syscall.S_IFREG: '-', + syscall.S_IFBLK: 'b', + syscall.S_IFDIR: 'd', + syscall.S_IFCHR: 'c', + syscall.S_IFIFO: 'f', + 0: '?', +} + +type smode uint16 + +func (mode smode) String() string { + s := []byte("?rwxrwxrwx") + s[0] = typestr[uint16(mode)&(syscall.S_IFMT&0xffff)] + if (mode & syscall.S_ISUID) != 0 { + s[3] = 's' + } + if (mode & syscall.S_ISGID) != 0 { + s[6] = 's' + } + if (mode & syscall.S_ISVTX) != 0 { + s[9] = 't' + } + for i := uint16(0); i < 9; i++ { + if (mode & (1 << i)) == 0 { + if s[9-i] == 's' || s[9-i] == 't' { + s[9-i] &= 0xDF + } else { + s[9-i] = '-' + } + } + } + return string(s) +} + +// Entry is an alias of meta.Entry, which is used to generate the string +// representation lazily. +type Entry meta.Entry + +func (entry *Entry) String() string { + if entry == nil { + return "" + } + if entry.Attr == nil { + return fmt.Sprintf(" (%d)", entry.Inode) + } + a := entry.Attr + mode := a.SMode() + return fmt.Sprintf(" (%d,[%s:0%06o,%d,%d,%d,%d,%d,%d,%d])", + entry.Inode, smode(mode), mode, a.Nlink, a.Uid, a.Gid, + a.Atime, a.Mtime, a.Ctime, a.Length) +} + +// LogContext is an interface to add duration on meta.Context. +type LogContext interface { + meta.Context + Duration() time.Duration +} + +type logContext struct { + meta.Context + start time.Time +} + +func (ctx *logContext) Duration() time.Duration { + return time.Since(ctx.start) +} + +// NewLogContext creates an LogContext starting from now. +func NewLogContext(ctx meta.Context) LogContext { + return &logContext{ctx, time.Now()} +} diff --git a/pkg/vfs/helpers_test.go b/pkg/vfs/helpers_test.go new file mode 100644 index 0000000..789d0c0 --- /dev/null +++ b/pkg/vfs/helpers_test.go @@ -0,0 +1,80 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "syscall" + "testing" + + "github.com/juicedata/juicefs/pkg/meta" +) + +type smodeCase struct { + mode uint16 + str string +} + +var cases = []smodeCase{ + {syscall.S_IFDIR | 00755, "drwxr-xr-x"}, + {syscall.S_IFREG | 01644, "-rw-r--r-T"}, + {syscall.S_IFLNK | 03755, "lrwxr-sr-t"}, + {syscall.S_IFSOCK | 06700, "srws--S---"}, +} + +func TestSmode(t *testing.T) { + for _, s := range cases { + res := smode(s.mode).String() + if res != s.str { + t.Fatalf("str of %o: %s != %s", s.mode, res, s.str) + } + } +} + +func TestEntryString(t *testing.T) { + var e *Entry + if e.String() != "" { + t.Fatalf("empty entry should be ''") + } + e = &Entry{Inode: 2, Name: []byte("test")} + if e.String() != " (2)" { + t.Fatalf("empty entry should be ` (2)`") + } + + e.Attr = &meta.Attr{ + Typ: meta.TypeFile, + Mode: 01755, + Nlink: 1, + Uid: 2, + Gid: 3, + Atime: 4, + Mtime: 5, + Ctime: 6, + Length: 7, + } + if e.String() != " (2,[-rwxr-xr-t:0101755,1,2,3,4,5,6,7])" { + t.Fatalf("string of entry is not expected: %s", e.String()) + } +} + +func TestError(t *testing.T) { + if strerr(0) != "OK" { + t.Fatalf("expect 'OK' but got %q", strerr(0)) + } + if strerr(syscall.EACCES) != "permission denied" { + t.Fatalf("expect 'Access denied', but got %q", strerr(syscall.EACCES)) + } +} diff --git a/pkg/vfs/internal.go b/pkg/vfs/internal.go new file mode 100644 index 0000000..5cd6985 --- /dev/null +++ b/pkg/vfs/internal.go @@ -0,0 +1,211 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "bytes" + "fmt" + "os" + "strconv" + "strings" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" + io_prometheus_client "github.com/prometheus/client_model/go" +) + +const ( + minInternalNode = 0x7FFFFFFF00000000 + logInode = minInternalNode + 1 + controlInode = minInternalNode + 2 + statsInode = minInternalNode + 3 + configInode = minInternalNode + 4 + trashInode = meta.TrashInode +) + +type internalNode struct { + inode Ino + name string + attr *Attr +} + +var internalNodes = []*internalNode{ + {logInode, ".accesslog", &Attr{Mode: 0400}}, + {controlInode, ".control", &Attr{Mode: 0666}}, + {statsInode, ".stats", &Attr{Mode: 0444}}, + {configInode, ".config", &Attr{Mode: 0400}}, + {trashInode, meta.TrashName, &Attr{Mode: 0555}}, +} + +func init() { + uid := uint32(os.Getuid()) + gid := uint32(os.Getgid()) + now := time.Now().Unix() + for _, v := range internalNodes { + if v.inode == trashInode { + v.attr.Typ = meta.TypeDirectory + v.attr.Nlink = 2 + } else { + v.attr.Typ = meta.TypeFile + v.attr.Nlink = 1 + v.attr.Uid = uid + v.attr.Gid = gid + } + v.attr.Atime = now + v.attr.Mtime = now + v.attr.Ctime = now + v.attr.Full = true + } +} + +func IsSpecialNode(ino Ino) bool { + return ino >= minInternalNode +} + +func IsSpecialName(name string) bool { + if name[0] != '.' { + return false + } + for _, n := range internalNodes { + if name == n.name { + return true + } + } + return false +} + +func getInternalNode(ino Ino) *internalNode { + for _, n := range internalNodes { + if ino == n.inode { + return n + } + } + return nil +} + +func GetInternalNodeByName(name string) (Ino, *Attr) { + n := getInternalNodeByName(name) + if n != nil { + return n.inode, n.attr + } + return 0, nil +} + +func getInternalNodeByName(name string) *internalNode { + if name[0] != '.' { + return nil + } + for _, n := range internalNodes { + if name == n.name { + return n + } + } + return nil +} + +func collectMetrics() []byte { + mfs, err := prometheus.DefaultGatherer.Gather() + if err != nil { + logger.Errorf("collect metrics: %s", err) + return nil + } + w := bytes.NewBuffer(nil) + format := func(v float64) string { + return strconv.FormatFloat(v, 'f', -1, 64) + } + for _, mf := range mfs { + for _, m := range mf.Metric { + var name string = *mf.Name + for _, l := range m.Label { + if *l.Name != "mp" && *l.Name != "vol_name" { + name += "_" + *l.Value + } + } + switch *mf.Type { + case io_prometheus_client.MetricType_GAUGE: + _, _ = fmt.Fprintf(w, "%s %s\n", name, format(*m.Gauge.Value)) + case io_prometheus_client.MetricType_COUNTER: + _, _ = fmt.Fprintf(w, "%s %s\n", name, format(*m.Counter.Value)) + case io_prometheus_client.MetricType_HISTOGRAM: + _, _ = fmt.Fprintf(w, "%s_total %d\n", name, *m.Histogram.SampleCount) + _, _ = fmt.Fprintf(w, "%s_sum %s\n", name, format(*m.Histogram.SampleSum)) + case io_prometheus_client.MetricType_SUMMARY: + } + } + } + return w.Bytes() +} + +func (v *VFS) handleInternalMsg(ctx Context, cmd uint32, r *utils.Buffer) []byte { + switch cmd { + case meta.Rmr: + inode := Ino(r.Get64()) + name := string(r.Get(int(r.Get8()))) + r := meta.Remove(v.Meta, ctx, inode, name) + return []byte{uint8(r)} + case meta.Info: + var summary meta.Summary + inode := Ino(r.Get64()) + var recursive uint8 = 1 + if r.HasMore() { + recursive = r.Get8() + } + + wb := utils.NewBuffer(4) + r := meta.GetSummary(v.Meta, ctx, inode, &summary, recursive != 0) + if r != 0 { + msg := r.Error() + wb.Put32(uint32(len(msg))) + return append(wb.Bytes(), []byte(msg)...) + } + var w = bytes.NewBuffer(nil) + fmt.Fprintf(w, " inode: %d\n", inode) + fmt.Fprintf(w, " files:\t%d\n", summary.Files) + fmt.Fprintf(w, " dirs:\t%d\n", summary.Dirs) + fmt.Fprintf(w, " length:\t%d\n", summary.Length) + fmt.Fprintf(w, " size:\t%d\n", summary.Size) + + if summary.Files == 1 && summary.Dirs == 0 { + fmt.Fprintf(w, " chunks:\n") + for indx := uint64(0); indx*meta.ChunkSize < summary.Length; indx++ { + var cs []meta.Slice + _ = v.Meta.Read(ctx, inode, uint32(indx), &cs) + for _, c := range cs { + fmt.Fprintf(w, "\t%d:\t%d\t%d\t%d\t%d\n", indx, c.Chunkid, c.Size, c.Off, c.Len) + } + } + } + wb.Put32(uint32(w.Len())) + return append(wb.Bytes(), w.Bytes()...) + case meta.FillCache: + paths := strings.Split(string(r.Get(int(r.Get32()))), "\n") + concurrent := r.Get16() + background := r.Get8() + if background == 0 { + v.fillCache(paths, int(concurrent)) + } else { + go v.fillCache(paths, int(concurrent)) + } + return []byte{uint8(0)} + default: + logger.Warnf("unknown message type: %d", cmd) + return []byte{uint8(syscall.EINVAL & 0xff)} + } +} diff --git a/pkg/vfs/reader.go b/pkg/vfs/reader.go new file mode 100644 index 0000000..01a585c --- /dev/null +++ b/pkg/vfs/reader.go @@ -0,0 +1,904 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "context" + "fmt" + "runtime" + "sort" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" +) + +/* + * state of sliceReader + * + * <-- REFRESH + * | | + * NEW -> BUSY -> READY + * | | + * BREAK ---> INVALID + */ +const ( + NEW = iota + BUSY + REFRESH + BREAK + READY + INVALID +) + +const readSessions = 2 + +var readBufferUsed int64 + +type sstate uint8 + +func (m sstate) valid() bool { return m != BREAK && m != INVALID } + +var stateNames = []string{"NEW", "BUSY", "REFRESH", "BREAK", "READY", "INVALID"} + +func (m sstate) String() string { + if m <= INVALID { + return stateNames[m] + } + panic("") +} + +type FileReader interface { + Read(ctx meta.Context, off uint64, buf []byte) (int, syscall.Errno) + Close(ctx meta.Context) +} + +type DataReader interface { + Open(inode Ino, length uint64) FileReader + Truncate(inode Ino, length uint64) + Invalidate(inode Ino, off, length uint64) +} + +type frange struct { + off uint64 + len uint64 +} + +func (r *frange) String() string { return fmt.Sprintf("[%d,%d,%d)", r.off, r.len, r.end()) } +func (r *frange) end() uint64 { return r.off + r.len } +func (r *frange) contain(p uint64) bool { return r.off < p && p < r.end() } +func (r *frange) overlap(a *frange) bool { return a.off < r.end() && r.off < a.end() } +func (r *frange) include(a *frange) bool { return r.off <= a.off && a.end() <= r.end() } + +// protected by file +type sliceReader struct { + file *fileReader + block *frange + state sstate + page *chunk.Page + indx uint32 + currentPos uint32 + lastAccess time.Time + cond *utils.Cond + next *sliceReader + prev **sliceReader + refs uint16 +} + +func (s *sliceReader) delay(delay time.Duration) { + time.AfterFunc(delay, s.run) +} + +func (s *sliceReader) done(err syscall.Errno, delay time.Duration) { + f := s.file + switch s.state { + case BUSY: + s.state = NEW // failed + case BREAK: + s.state = INVALID + case REFRESH: + s.state = NEW + } + if err != 0 { + if !f.closing { + logger.Errorf("read file %d: %s", f.inode, err) + } + f.err = err + } + if f.shouldStop() { + s.state = INVALID + } + + switch s.state { + case NEW: + s.delay(delay) + case READY: + s.cond.Broadcast() + case INVALID: + if s.refs == 0 { + s.delete() + if f.closing && f.slices == nil { + f.r.Lock() + if f.refs == 0 { + f.delete() + } + f.r.Unlock() + } + } else { + s.cond.Broadcast() + } + } + runtime.Goexit() +} + +func retry_time(trycnt uint32) time.Duration { + if trycnt < 30 { + return time.Millisecond * time.Duration((trycnt-1)*300+1) + } + return time.Second * 10 +} + +func (s *sliceReader) run() { + f := s.file + f.Lock() + defer f.Unlock() + if s.state != NEW || f.shouldStop() { + s.done(0, 0) + } + s.state = BUSY + indx := s.indx + inode := f.inode + f.Unlock() + + f.Lock() + length := f.length + f.Unlock() + var chunks []meta.Slice + err := f.r.m.Read(meta.Background, inode, indx, &chunks) + f.Lock() + if s.state != BUSY || f.err != 0 || f.closing { + s.done(0, 0) + } + if err == syscall.ENOENT { + s.done(err, 0) + } else if err != 0 { + f.tried++ + trycnt := f.tried + if trycnt >= f.r.maxRetries { + s.done(syscall.EIO, 0) + } else { + s.done(0, retry_time(trycnt)) + } + } + + s.currentPos = 0 + if s.block.off > length { + s.block.len = 0 + s.state = READY + s.done(0, 0) + } else if s.block.end() > length { + s.block.len = length - s.block.off + } + need := s.block.len + f.Unlock() + + p := s.page.Slice(0, int(need)) + defer p.Release() + var n int + ctx := context.TODO() + n = f.r.Read(ctx, p, chunks, (uint32(s.block.off))%meta.ChunkSize) + + f.Lock() + if s.state != BUSY || f.shouldStop() { + s.done(0, 0) + } + if n == int(need) { + s.state = READY + s.currentPos = uint32(n) + s.file.tried = 0 + s.lastAccess = time.Now() + s.done(0, 0) + } else { + s.currentPos = 0 // start again from beginning + err = syscall.EIO + f.tried++ + _ = f.r.m.InvalidateChunkCache(meta.Background, inode, indx) + if f.tried >= f.r.maxRetries { + s.done(err, 0) + } else { + s.done(0, retry_time(f.tried)) + } + } +} + +func (s *sliceReader) invalidate() { + switch s.state { + case NEW: + case BUSY: + s.state = REFRESH + // TODO: interrupt reader + case READY: + if s.refs > 0 { + s.state = NEW + go s.run() + } else { + s.state = INVALID + s.delete() // nobody wants it anymore, so delete it + } + } +} + +func (s *sliceReader) drop() { + if s.state <= BREAK { + if s.refs == 0 { + s.state = BREAK + // TODO: interrupt reader + } + } else { + if s.refs == 0 { + s.delete() // nobody wants it anymore, so delete it + } else if s.state == READY { + s.state = INVALID // somebody still using it, so mark it for removal + } + } +} + +func (s *sliceReader) delete() { + *(s.prev) = s.next + if s.next != nil { + s.next.prev = s.prev + } else { + s.file.last = s.prev + } + s.page.Release() + atomic.AddInt64(&readBufferUsed, -int64(s.block.len)) +} + +type session struct { + lastOffset uint64 + total uint64 + readahead uint64 + atime time.Time +} + +type fileReader struct { + // protected by itself + inode Ino + length uint64 + err syscall.Errno + tried uint32 + sessions [readSessions]session + slices *sliceReader + last **sliceReader + + sync.Mutex + closing bool + + // protected by r + refs uint16 + next *fileReader + r *dataReader +} + +// protected by f +func (f *fileReader) newSlice(block *frange) *sliceReader { + s := &sliceReader{} + s.file = f + s.lastAccess = time.Now() + s.indx = uint32(block.off / meta.ChunkSize) + s.block = &frange{block.off, block.len} // random read + blockend := (block.off/f.r.blockSize + 1) * f.r.blockSize + if s.block.end() > f.length { + s.block.len = f.length - s.block.off + } + if s.block.end() > blockend { + s.block.len = blockend - s.block.off + } + block.off = s.block.end() + block.len -= s.block.len + s.page = chunk.NewOffPage(int(s.block.len)) + s.cond = utils.NewCond(&f.Mutex) + s.prev = f.last + *(f.last) = s + f.last = &(s.next) + go s.run() + atomic.AddInt64(&readBufferUsed, int64(s.block.len)) + return s +} + +func (f *fileReader) delete() { + r := f.r + i := r.files[f.inode] + if i == f { + if i.next != nil { + r.files[f.inode] = i.next + } else { + delete(r.files, f.inode) + } + } else { + for i != nil { + if i.next == f { + i.next = f.next + break + } + i = i.next + } + } + f.next = nil +} + +func (f *fileReader) acquire() { + f.r.Lock() + defer f.r.Unlock() + f.refs++ +} + +func (f *fileReader) release() { + f.r.Lock() + defer f.r.Unlock() + f.refs-- + if f.refs == 0 && f.slices == nil { + f.delete() + } +} + +func (f *fileReader) guessSession(block *frange) int { + idx := -1 + var closestOff uint64 + for i, ses := range f.sessions { + if ses.lastOffset > closestOff && ses.lastOffset <= block.off && block.off <= ses.lastOffset+ses.readahead+f.r.blockSize { + idx = i + closestOff = ses.lastOffset + } + } + if idx == -1 { + for i, ses := range f.sessions { + bt := ses.readahead / 8 + if bt < f.r.blockSize { + bt = f.r.blockSize + } + min := ses.lastOffset - bt + if ses.lastOffset < bt { + min = 0 + } + if min <= block.off && block.off < ses.lastOffset && (closestOff == 0 || ses.lastOffset < closestOff) { + idx = i + closestOff = ses.lastOffset + } + } + } + if idx == -1 { + for i, ses := range f.sessions { + if ses.total == 0 { + idx = i + break + } + if idx == -1 || ses.atime.Before(f.sessions[idx].atime) { + idx = i + } + } + f.sessions[idx].lastOffset = block.off + f.sessions[idx].total = block.len + f.sessions[idx].readahead = 0 + } else { + if block.end() > f.sessions[idx].lastOffset { + f.sessions[idx].total += block.end() - f.sessions[idx].lastOffset + } + } + f.sessions[idx].atime = time.Now() + return idx +} + +func (f *fileReader) checkReadahead(block *frange) int { + idx := f.guessSession(block) + ses := &f.sessions[idx] + seqdata := ses.total + readahead := ses.readahead + used := uint64(atomic.LoadInt64(&readBufferUsed)) + if readahead == 0 && (block.off == 0 || seqdata > block.len) { // begin with read-ahead turned on + ses.readahead = f.r.blockSize + } else if readahead < f.r.readAheadMax && seqdata >= readahead && f.r.readAheadTotal-used > readahead*4 { + ses.readahead *= 2 + } else if readahead >= f.r.blockSize && (f.r.readAheadTotal-used < readahead/2 || seqdata < readahead/4) { + ses.readahead /= 2 + } + if ses.readahead >= f.r.blockSize { + ahead := frange{block.end(), ses.readahead} + f.readAhead(&ahead) + } + if block.end() > ses.lastOffset { + ses.lastOffset = block.end() + } + return idx +} + +func (f *fileReader) need(block *frange) bool { + for _, ses := range f.sessions { + if ses.total == 0 { + break + } + bt := ses.readahead / 8 + if bt < f.r.blockSize { + bt = f.r.blockSize + } + b := &frange{ses.lastOffset - bt, ses.readahead*2 + f.r.blockSize*2} + if ses.lastOffset < bt { + b.off = 0 + } + if block.overlap(b) { + return true + } + } + return false +} + +// cleanup unused requests +func (f *fileReader) cleanupRequests(block *frange) { + now := time.Now() + var cnt int + f.visit(func(s *sliceReader) { + if !s.state.valid() || + !block.overlap(s.block) && (s.lastAccess.Add(time.Second*30).Before(now) || !f.need(s.block)) { + s.drop() + } else if !block.overlap(s.block) { + cnt++ + } + }) + f.visit(func(s *sliceReader) { + if !block.overlap(s.block) && cnt > f.r.maxRequests { + s.drop() + cnt-- + } + }) +} + +func (f *fileReader) releaseIdleBuffer() { + f.Lock() + defer f.Unlock() + now := time.Now() + var idle = time.Minute + used := atomic.LoadInt64(&readBufferUsed) + if used > int64(f.r.readAheadTotal) { + idle /= time.Duration(used / int64(f.r.readAheadTotal)) + } + f.visit(func(s *sliceReader) { + if !s.state.valid() || s.lastAccess.Add(idle).Before(now) || !f.need(s.block) { + s.drop() + } + }) +} + +func (f *fileReader) splitRange(block *frange) []uint64 { + ranges := []uint64{block.off, block.end()} + contain := func(p uint64) bool { + for _, i := range ranges { + if i == p { + return true + } + } + return false + } + f.visit(func(s *sliceReader) { + if s.state.valid() { + if block.contain(s.block.off) && !contain(s.block.off) { + ranges = append(ranges, s.block.off) + } + if block.contain(s.block.end()) && !contain(s.block.end()) { + ranges = append(ranges, s.block.end()) + } + } + }) + sort.Slice(ranges, func(i, j int) bool { + return ranges[i] < ranges[j] + }) + return ranges +} + +// protected by f +func (f *fileReader) readAhead(block *frange) { + f.visit(func(r *sliceReader) { + if r.state.valid() && r.block.off <= block.off && r.block.end() > block.off { + if r.state == READY && block.len > f.r.blockSize && r.block.off == block.off && r.block.off%f.r.blockSize == 0 { + // next block is ready, reduce readahead by a block + block.len -= f.r.blockSize / 2 + } + if r.block.end() <= block.end() { + block.len = block.end() - r.block.end() + } else { + block.len = 0 + } + block.off = r.block.end() + } + }) + if block.len > 0 && block.off < f.length && uint64(atomic.LoadInt64(&readBufferUsed)) < f.r.readAheadTotal { + if block.len < f.r.blockSize { + block.len += f.r.blockSize - block.end()%f.r.blockSize // align to end of a block + } + f.newSlice(block) + if block.len > 0 { + f.readAhead(block) + } + } +} + +type req struct { + frange + s *sliceReader +} + +func (f *fileReader) prepareRequests(ranges []uint64) []*req { + var reqs []*req + edges := len(ranges) + for i := 0; i < edges-1; i++ { + var added bool + b := frange{ranges[i], ranges[i+1] - ranges[i]} + f.visit(func(s *sliceReader) { + if !added && s.state.valid() && s.block.include(&b) { + s.refs++ + s.lastAccess = time.Now() + reqs = append(reqs, &req{frange{ranges[i] - s.block.off, b.len}, s}) + added = true + } + }) + if !added { + for b.len > 0 { + s := f.newSlice(&b) + s.refs++ + reqs = append(reqs, &req{frange{0, s.block.len}, s}) + } + } + } + return reqs +} + +func (f *fileReader) shouldStop() bool { + return f.err != 0 || f.closing +} + +func (f *fileReader) waitForIO(ctx meta.Context, reqs []*req, buf []byte) (int, syscall.Errno) { + start := time.Now() + for _, req := range reqs { + s := req.s + for s.state != READY && uint64(s.currentPos) < s.block.len { + if s.cond.WaitWithTimeout(time.Second) { + if ctx.Canceled() { + logger.Warnf("read %d interrupted after %d", f.inode, time.Since(start)) + return 0, syscall.EINTR + } + } + if f.shouldStop() { + return 0, f.err + } + } + } + + var n int + for _, req := range reqs { + s := req.s + if req.off < s.block.len && s.block.off+req.off < f.length { + if req.end() > s.block.len { + logger.Warnf("not enough bytes (%d < %d), restart read", s.block.len, req.end()) + return 0, syscall.EAGAIN + } + if s.block.off+req.end() > f.length { + req.len = f.length - s.block.off - req.off + } + n += copy(buf[n:], s.page.Data[req.off:req.end()]) + } + } + return n, 0 +} + +func (f *fileReader) Read(ctx meta.Context, offset uint64, buf []byte) (int, syscall.Errno) { + f.Lock() + defer f.Unlock() + f.acquire() + defer f.release() + + if f.err != 0 || f.closing { + return 0, f.err + } + + size := uint64(len(buf)) + if offset >= f.length || size == 0 { + return 0, 0 + } + block := &frange{offset, size} + if block.end() > f.length { + block.len = f.length - block.off + } + + f.cleanupRequests(block) + var lastBS uint64 = 32 << 10 + if block.off+lastBS > f.length { + lastblock := frange{f.length - lastBS, lastBS} + if f.length < lastBS { + lastblock = frange{0, f.length} + } + f.readAhead(&lastblock) + } + ranges := f.splitRange(block) + reqs := f.prepareRequests(ranges) + defer func() { + for _, req := range reqs { + s := req.s + s.refs-- + if s.refs == 0 && s.state == INVALID { + s.delete() + } + } + }() + f.checkReadahead(block) + return f.waitForIO(ctx, reqs, buf) +} + +func (f *fileReader) visit(fn func(s *sliceReader)) { + var next *sliceReader + for s := f.slices; s != nil; s = next { + next = s.next + fn(s) + } +} + +func (f *fileReader) Close(ctx meta.Context) { + f.Lock() + f.closing = true + f.visit(func(s *sliceReader) { + s.drop() + }) + f.release() + f.Unlock() +} + +type dataReader struct { + sync.Mutex + m meta.Meta + store chunk.ChunkStore + files map[Ino]*fileReader + blockSize uint64 + readAheadMax uint64 + readAheadTotal uint64 + maxRequests int + maxRetries uint32 +} + +func NewDataReader(conf *Config, m meta.Meta, store chunk.ChunkStore) DataReader { + var readAheadTotal = 256 << 20 + var readAheadMax = conf.Chunk.BlockSize * 8 + if conf.Chunk.BufferSize > 0 { + readAheadTotal = conf.Chunk.BufferSize * 8 / 10 // 80% of total buffer + } + if conf.Chunk.Readahead > 0 { + readAheadMax = conf.Chunk.Readahead + } + r := &dataReader{ + m: m, + store: store, + files: make(map[Ino]*fileReader), + blockSize: uint64(conf.Chunk.BlockSize), + readAheadTotal: uint64(readAheadTotal), + readAheadMax: uint64(readAheadMax), + maxRequests: readAheadMax/conf.Chunk.BlockSize*readSessions + 1, + maxRetries: uint32(conf.Meta.Retries), + } + go r.checkReadBuffer() + return r +} + +func (r *dataReader) checkReadBuffer() { + for { + r.Lock() + for _, f := range r.files { + for f != nil { + r.Unlock() + f.releaseIdleBuffer() + r.Lock() + f = f.next + } + } + r.Unlock() + time.Sleep(time.Second) + } +} + +func (r *dataReader) Open(inode Ino, length uint64) FileReader { + f := &fileReader{ + r: r, + inode: inode, + length: length, + } + f.last = &(f.slices) + + r.Lock() + f.refs = 1 + f.next = r.files[inode] + r.files[inode] = f + r.Unlock() + return f +} + +func (r *dataReader) visit(inode Ino, fn func(*fileReader)) { + // r could be hold inside f, so Unlock r first to avoid deadlock + r.Lock() + var fs []*fileReader + f := r.files[inode] + for f != nil { + fs = append(fs, f) + f = f.next + } + r.Unlock() + for _, f := range fs { + f.Lock() + fn(f) + f.Unlock() + } +} + +func (r *dataReader) Truncate(inode Ino, length uint64) { + r.visit(inode, func(f *fileReader) { + if length < f.length { + f.visit(func(s *sliceReader) { + if s.block.off+s.block.len > length { + s.invalidate() + } + }) + } + f.length = length + }) +} + +func (r *dataReader) Invalidate(inode Ino, off, length uint64) { + b := frange{off, length} + r.visit(inode, func(f *fileReader) { + if off+length > f.length { + f.length = off + length + } + f.visit(func(s *sliceReader) { + if b.overlap(s.block) { + s.invalidate() + } + }) + }) +} + +func (r *dataReader) readSlice(ctx context.Context, s *meta.Slice, page *chunk.Page, off int) error { + buf := page.Data + read := 0 + if s.Chunkid == 0 { + for read < len(buf) { + buf[read] = 0 + read++ + } + return nil + } + + reader := r.store.NewReader(s.Chunkid, int(s.Size)) + for read < len(buf) { + p := page.Slice(read, len(buf)-read) + n, err := reader.ReadAt(ctx, p, off+int(s.Off)) + p.Release() + if n == 0 && err != nil { + logger.Warningf("fail to read chunkid %d (off:%d, size:%d, clen: %d): %s", + s.Chunkid, off+int(s.Off), len(buf)-read, s.Size, err) + return err + } + read += n + off += n + } + return nil +} + +func (r *dataReader) Read(ctx context.Context, page *chunk.Page, chunks []meta.Slice, offset uint32) int { + if len(chunks) > 16 { + return r.readManyChunks(ctx, page, chunks, offset) + } + read := 0 + var pos uint32 + errs := make(chan error, 10) + waits := 0 + buf := page.Data + size := len(buf) + for i := 0; i < len(chunks); i++ { + if read < size && offset < pos+chunks[i].Len { + toread := utils.Min(size-read, int(pos+chunks[i].Len-offset)) + go func(s *meta.Slice, p *chunk.Page, off, pos uint32) { + defer p.Release() + errs <- r.readSlice(ctx, s, p, int(off)) + }(&chunks[i], page.Slice(read, toread), offset-pos, pos) + read += toread + offset += uint32(toread) + waits++ + } + pos += chunks[i].Len + } + for read < size { + buf[read] = 0 + read++ + } + var err error + // wait for all goroutine to return, otherwise they may access invalid memory + for waits > 0 { + if e := <-errs; e != nil { + err = e + } + waits-- + } + if err != nil { + return 0 + } + return read +} + +func (r *dataReader) readManyChunks(ctx context.Context, page *chunk.Page, chunks []meta.Slice, offset uint32) int { + read := 0 + var pos uint32 + var err error + errs := make(chan error, 10) + waits := 0 + buf := page.Data + size := len(buf) + concurrency := make(chan byte, 16) + +CHUNKS: + for i := 0; i < len(chunks); i++ { + if read < size && offset < pos+chunks[i].Len { + toread := utils.Min(size-read, int(pos+chunks[i].Len-offset)) + WAIT: + for { + select { + case concurrency <- 1: + break WAIT + case e := <-errs: + waits-- + if e != nil { + err = e + break CHUNKS + } + } + } + go func(s *meta.Slice, p *chunk.Page, off int, pos uint32) { + defer p.Release() + errs <- r.readSlice(ctx, s, p, off) + <-concurrency + }(&chunks[i], page.Slice(read, toread), int(offset-pos), pos) + + read += toread + offset += uint32(toread) + waits++ + } + pos += chunks[i].Len + } + // wait for all jobs done, otherwise they may access invalid memory + for waits > 0 { + if e := <-errs; e != nil { + err = e + } + waits-- + } + if err != nil { + return 0 + } + for read < size { + buf[read] = 0 + read++ + } + return read +} diff --git a/pkg/vfs/vfs.go b/pkg/vfs/vfs.go new file mode 100644 index 0000000..b920a3b --- /dev/null +++ b/pkg/vfs/vfs.go @@ -0,0 +1,957 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "encoding/json" + "runtime" + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" +) + +type Ino = meta.Ino +type Attr = meta.Attr +type Context = LogContext + +const ( + rootID = 1 + maxName = 255 + maxSymlink = 4096 + maxFileSize = meta.ChunkSize << 31 +) + +type Config struct { + Meta *meta.Config + Format *meta.Format + Chunk *chunk.Config + Version string + Mountpoint string + AttrTimeout time.Duration + DirEntryTimeout time.Duration + EntryTimeout time.Duration + FastResolve bool `json:",omitempty"` + AccessLog string `json:",omitempty"` + HideInternal bool +} + +var ( + readSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "fuse_read_size_bytes", + Help: "size of read distributions.", + Buckets: prometheus.LinearBuckets(4096, 4096, 32), + }) + writtenSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "fuse_written_size_bytes", + Help: "size of write distributions.", + Buckets: prometheus.LinearBuckets(4096, 4096, 32), + }) +) + +func (v *VFS) Lookup(ctx Context, parent Ino, name string) (entry *meta.Entry, err syscall.Errno) { + var inode Ino + var attr = &Attr{} + if parent == rootID { + n := getInternalNodeByName(name) + if n != nil { + entry = &meta.Entry{Inode: n.inode, Attr: n.attr} + return + } + } + defer func() { + logit(ctx, "lookup (%d,%s): %s%s", parent, name, strerr(err), (*Entry)(entry)) + }() + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + err = v.Meta.Lookup(ctx, parent, name, &inode, attr) + if err == 0 { + v.UpdateLength(inode, attr) + entry = &meta.Entry{Inode: inode, Attr: attr} + } + return +} + +func (v *VFS) GetAttr(ctx Context, ino Ino, opened uint8) (entry *meta.Entry, err syscall.Errno) { + if IsSpecialNode(ino) && getInternalNode(ino) != nil { + n := getInternalNode(ino) + entry = &meta.Entry{Inode: n.inode, Attr: n.attr} + return + } + defer func() { logit(ctx, "getattr (%d): %s%s", ino, strerr(err), (*Entry)(entry)) }() + var attr = &Attr{} + err = v.Meta.GetAttr(ctx, ino, attr) + if err == 0 { + v.UpdateLength(ino, attr) + entry = &meta.Entry{Inode: ino, Attr: attr} + } + return +} + +func get_filetype(mode uint16) uint8 { + switch mode & (syscall.S_IFMT & 0xffff) { + case syscall.S_IFIFO: + return meta.TypeFIFO + case syscall.S_IFSOCK: + return meta.TypeSocket + case syscall.S_IFLNK: + return meta.TypeSymlink + case syscall.S_IFREG: + return meta.TypeFile + case syscall.S_IFBLK: + return meta.TypeBlockDev + case syscall.S_IFDIR: + return meta.TypeDirectory + case syscall.S_IFCHR: + return meta.TypeCharDev + } + return meta.TypeFile +} + +func (v *VFS) Mknod(ctx Context, parent Ino, name string, mode uint16, cumask uint16, rdev uint32) (entry *meta.Entry, err syscall.Errno) { + defer func() { + logit(ctx, "mknod (%d,%s,%s:0%04o,0x%08X): %s%s", parent, name, smode(mode), mode, rdev, strerr(err), (*Entry)(entry)) + }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EEXIST + return + } + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + _type := get_filetype(mode) + if _type == 0 { + err = syscall.EPERM + return + } + + var inode Ino + var attr = &Attr{} + err = v.Meta.Mknod(ctx, parent, name, _type, mode&07777, cumask, rdev, &inode, attr) + if err == 0 { + entry = &meta.Entry{Inode: inode, Attr: attr} + } + return +} + +func (v *VFS) Unlink(ctx Context, parent Ino, name string) (err syscall.Errno) { + defer func() { logit(ctx, "unlink (%d,%s): %s", parent, name, strerr(err)) }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EPERM + return + } + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + err = v.Meta.Unlink(ctx, parent, name) + return +} + +func (v *VFS) Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16) (entry *meta.Entry, err syscall.Errno) { + defer func() { + logit(ctx, "mkdir (%d,%s,%s:0%04o): %s%s", parent, name, smode(mode), mode, strerr(err), (*Entry)(entry)) + }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EEXIST + return + } + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + + var inode Ino + var attr = &Attr{} + err = v.Meta.Mkdir(ctx, parent, name, mode, cumask, 0, &inode, attr) + if err == 0 { + entry = &meta.Entry{Inode: inode, Attr: attr} + } + return +} + +func (v *VFS) Rmdir(ctx Context, parent Ino, name string) (err syscall.Errno) { + defer func() { logit(ctx, "rmdir (%d,%s): %s", parent, name, strerr(err)) }() + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + err = v.Meta.Rmdir(ctx, parent, name) + return +} + +func (v *VFS) Symlink(ctx Context, path string, parent Ino, name string) (entry *meta.Entry, err syscall.Errno) { + defer func() { + logit(ctx, "symlink (%d,%s,%s): %s%s", parent, name, path, strerr(err), (*Entry)(entry)) + }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EEXIST + return + } + if len(name) > maxName || len(path) >= maxSymlink { + err = syscall.ENAMETOOLONG + return + } + + var inode Ino + var attr = &Attr{} + err = v.Meta.Symlink(ctx, parent, name, path, &inode, attr) + if err == 0 { + entry = &meta.Entry{Inode: inode, Attr: attr} + } + return +} + +func (v *VFS) Readlink(ctx Context, ino Ino) (path []byte, err syscall.Errno) { + defer func() { logit(ctx, "readlink (%d): %s (%s)", ino, strerr(err), string(path)) }() + err = v.Meta.ReadLink(ctx, ino, &path) + return +} + +func (v *VFS) Rename(ctx Context, parent Ino, name string, newparent Ino, newname string, flags uint32) (err syscall.Errno) { + defer func() { + logit(ctx, "rename (%d,%s,%d,%s,%d): %s", parent, name, newparent, newname, flags, strerr(err)) + }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EPERM + return + } + if newparent == rootID && IsSpecialName(newname) { + err = syscall.EPERM + return + } + if len(name) > maxName || len(newname) > maxName { + err = syscall.ENAMETOOLONG + return + } + + err = v.Meta.Rename(ctx, parent, name, newparent, newname, flags, nil, nil) + return +} + +func (v *VFS) Link(ctx Context, ino Ino, newparent Ino, newname string) (entry *meta.Entry, err syscall.Errno) { + defer func() { + logit(ctx, "link (%d,%d,%s): %s%s", ino, newparent, newname, strerr(err), (*Entry)(entry)) + }() + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + if newparent == rootID && IsSpecialName(newname) { + err = syscall.EPERM + return + } + if len(newname) > maxName { + err = syscall.ENAMETOOLONG + return + } + + var attr = &Attr{} + err = v.Meta.Link(ctx, ino, newparent, newname, attr) + if err == 0 { + v.UpdateLength(ino, attr) + entry = &meta.Entry{Inode: ino, Attr: attr} + } + return +} + +func (v *VFS) Opendir(ctx Context, ino Ino) (fh uint64, err syscall.Errno) { + defer func() { logit(ctx, "opendir (%d): %s [fh:%d]", ino, strerr(err), fh) }() + fh = v.newHandle(ino).fh + return +} + +func (v *VFS) UpdateLength(inode Ino, attr *meta.Attr) { + if attr.Full && attr.Typ == meta.TypeFile { + length := v.writer.GetLength(inode) + if length > attr.Length { + attr.Length = length + } + v.reader.Truncate(inode, attr.Length) + } +} + +func (v *VFS) Readdir(ctx Context, ino Ino, size uint32, off int, fh uint64, plus bool) (entries []*meta.Entry, err syscall.Errno) { + defer func() { logit(ctx, "readdir (%d,%d,%d): %s (%d)", ino, size, off, strerr(err), len(entries)) }() + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + h.Lock() + defer h.Unlock() + + if h.children == nil || off == 0 { + var inodes []*meta.Entry + err = v.Meta.Readdir(ctx, ino, 1, &inodes) + if err == syscall.EACCES { + err = v.Meta.Readdir(ctx, ino, 0, &inodes) + } + if err != 0 { + return + } + h.children = inodes + if ino == rootID && !v.Conf.HideInternal { + // add internal nodes + for _, node := range internalNodes { + h.children = append(h.children, &meta.Entry{ + Inode: node.inode, + Name: []byte(node.name), + Attr: node.attr, + }) + } + } + } + if off < len(h.children) { + entries = h.children[off:] + } + return +} + +func (v *VFS) Releasedir(ctx Context, ino Ino, fh uint64) int { + h := v.findHandle(ino, fh) + if h == nil { + return 0 + } + v.ReleaseHandler(ino, fh) + logit(ctx, "releasedir (%d): OK", ino) + return 0 +} + +func (v *VFS) Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32) (entry *meta.Entry, fh uint64, err syscall.Errno) { + defer func() { + logit(ctx, "create (%d,%s,%s:0%04o): %s%s [fh:%d]", parent, name, smode(mode), mode, strerr(err), (*Entry)(entry), fh) + }() + if parent == rootID && IsSpecialName(name) { + err = syscall.EEXIST + return + } + if len(name) > maxName { + err = syscall.ENAMETOOLONG + return + } + + var inode Ino + var attr = &Attr{} + err = v.Meta.Create(ctx, parent, name, mode&07777, cumask, flags, &inode, attr) + if runtime.GOOS == "darwin" && err == syscall.ENOENT { + err = syscall.EACCES + } + if err == 0 { + v.UpdateLength(inode, attr) + fh = v.newFileHandle(inode, attr.Length, flags) + entry = &meta.Entry{Inode: inode, Attr: attr} + } + return +} + +func (v *VFS) Open(ctx Context, ino Ino, flags uint32) (entry *meta.Entry, fh uint64, err syscall.Errno) { + var attr = &Attr{} + if IsSpecialNode(ino) { + if ino != controlInode && (flags&O_ACCMODE) != syscall.O_RDONLY { + err = syscall.EACCES + return + } + h := v.newHandle(ino) + fh = h.fh + switch ino { + case logInode: + openAccessLog(fh) + case statsInode: + h.data = collectMetrics() + case configInode: + v.Conf.Format.RemoveSecret() + h.data, _ = json.MarshalIndent(v.Conf, "", " ") + } + n := getInternalNode(ino) + if n != nil { + entry = &meta.Entry{Inode: ino, Attr: n.attr} + return + } + } + defer func() { + if entry != nil { + logit(ctx, "open (%d): %s [fh:%d]", ino, strerr(err), fh) + } else { + logit(ctx, "open (%d): %s", ino, strerr(err)) + } + }() + err = v.Meta.Open(ctx, ino, flags, attr) + if err == 0 { + v.UpdateLength(ino, attr) + fh = v.newFileHandle(ino, attr.Length, flags) + entry = &meta.Entry{Inode: ino, Attr: attr} + } + return +} + +func (v *VFS) Truncate(ctx Context, ino Ino, size int64, opened uint8, attr *Attr) (err syscall.Errno) { + // defer func() { logit(ctx, "truncate (%d,%d): %s", ino, size, strerr(err)) }() + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + if size < 0 { + err = syscall.EINVAL + return + } + if size >= maxFileSize { + err = syscall.EFBIG + return + } + hs := v.findAllHandles(ino) + for _, h := range hs { + if !h.Wlock(ctx) { + err = syscall.EINTR + return + } + defer h.Wunlock() + } + _ = v.writer.Flush(ctx, ino) + err = v.Meta.Truncate(ctx, ino, 0, uint64(size), attr) + if err == 0 { + v.writer.Truncate(ino, uint64(size)) + v.reader.Truncate(ino, uint64(size)) + } + return 0 +} + +func (v *VFS) ReleaseHandler(ino Ino, fh uint64) { + v.releaseFileHandle(ino, fh) +} + +func (v *VFS) Release(ctx Context, ino Ino, fh uint64) { + if IsSpecialNode(ino) { + if ino == logInode { + closeAccessLog(fh) + } + v.releaseHandle(ino, fh) + return + } + var err syscall.Errno + defer func() { logit(ctx, "release (%d): %s", ino, strerr(err)) }() + if fh > 0 { + f := v.findHandle(ino, fh) + if f != nil { + f.Lock() + // rwlock_wait_for_unlock: + for (f.writing | f.writers | f.readers) != 0 { + if f.cond.WaitWithTimeout(time.Second) && ctx.Canceled() { + f.Unlock() + logger.Warnf("write lock %d interrupted", f.inode) + err = syscall.EINTR + return + } + } + locks := f.locks + owner := f.flockOwner + f.Unlock() + if f.writer != nil { + _ = f.writer.Flush(ctx) + } + if locks&1 != 0 { + _ = v.Meta.Flock(ctx, ino, owner, F_UNLCK, false) + } + } + _ = v.Meta.Close(ctx, ino) + go v.releaseFileHandle(ino, fh) // after writes it waits for data sync, so do it after everything + } +} + +func (v *VFS) Read(ctx Context, ino Ino, buf []byte, off uint64, fh uint64) (n int, err syscall.Errno) { + size := uint32(len(buf)) + if IsSpecialNode(ino) { + if ino == logInode { + n = readAccessLog(fh, buf) + } else { + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + data := h.data + if off < h.off { + data = nil + } else { + off -= h.off + } + if int(off) < len(data) { + data = data[off:] + if int(size) < len(data) { + data = data[:size] + } + n = copy(buf, data) + } + if len(h.data) > 2<<20 { + // drop first part to avoid OOM + h.off += 1 << 20 + h.data = h.data[1<<20:] + } + } + return + } + + defer func() { + readSizeHistogram.Observe(float64(n)) + logit(ctx, "read (%d,%d,%d): %s (%d)", ino, size, off, strerr(err), n) + }() + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + if off >= maxFileSize || off+uint64(size) >= maxFileSize { + err = syscall.EFBIG + return + } + if h.reader == nil { + err = syscall.EACCES + return + } + if !h.Rlock(ctx) { + err = syscall.EINTR + return + } + defer h.Runlock() + + _ = v.writer.Flush(ctx, ino) + n, err = h.reader.Read(ctx, off, buf) + for err == syscall.EAGAIN { + n, err = h.reader.Read(ctx, off, buf) + } + if err == syscall.ENOENT { + err = syscall.EBADF + } + h.removeOp(ctx) + return +} + +func (v *VFS) Write(ctx Context, ino Ino, buf []byte, off, fh uint64) (err syscall.Errno) { + size := uint64(len(buf)) + defer func() { logit(ctx, "write (%d,%d,%d): %s", ino, size, off, strerr(err)) }() + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + if off >= maxFileSize || off+size >= maxFileSize { + err = syscall.EFBIG + return + } + + if ino == controlInode { + h.pending = append(h.pending, buf...) + rb := utils.ReadBuffer(h.pending) + cmd := rb.Get32() + size := int(rb.Get32()) + if rb.Left() < size { + logger.Debugf("message not complete: %d %d > %d", cmd, size, rb.Left()) + return + } + h.data = append(h.data, h.pending...) + h.pending = h.pending[:0] + if rb.Left() == size { + h.data = append(h.data, v.handleInternalMsg(ctx, cmd, rb)...) + } else { + logger.Warnf("broken message: %d %d < %d", cmd, size, rb.Left()) + h.data = append(h.data, uint8(syscall.EIO&0xff)) + } + return + } + + if h.writer == nil { + err = syscall.EACCES + return + } + + if !h.Wlock(ctx) { + err = syscall.EINTR + return + } + defer h.Wunlock() + + err = h.writer.Write(ctx, off, buf) + if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL { + err = syscall.EBADF + } + h.removeOp(ctx) + + if err == 0 { + writtenSizeHistogram.Observe(float64(len(buf))) + v.reader.Truncate(ino, v.writer.GetLength(ino)) + } + return +} + +func (v *VFS) Fallocate(ctx Context, ino Ino, mode uint8, off, length int64, fh uint64) (err syscall.Errno) { + defer func() { logit(ctx, "fallocate (%d,%d,%d,%d): %s", ino, mode, off, length, strerr(err)) }() + if off < 0 || length <= 0 { + err = syscall.EINVAL + return + } + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + if off >= maxFileSize || off+length >= maxFileSize { + err = syscall.EFBIG + return + } + if h.writer == nil { + err = syscall.EACCES + return + } + if !h.Wlock(ctx) { + err = syscall.EINTR + return + } + defer h.Wunlock() + defer h.removeOp(ctx) + + err = v.Meta.Fallocate(ctx, ino, mode, uint64(off), uint64(length)) + return +} + +func (v *VFS) CopyFileRange(ctx Context, nodeIn Ino, fhIn, offIn uint64, nodeOut Ino, fhOut, offOut, size uint64, flags uint32) (copied uint64, err syscall.Errno) { + defer func() { + logit(ctx, "copy_file_range (%d,%d,%d,%d,%d,%d): %s", nodeIn, offIn, nodeOut, offOut, size, flags, strerr(err)) + }() + if IsSpecialNode(nodeIn) { + err = syscall.ENOTSUP + return + } + if IsSpecialNode(nodeOut) { + err = syscall.EPERM + return + } + hi := v.findHandle(nodeIn, fhIn) + if fhIn == 0 || hi == nil || hi.inode != nodeIn { + err = syscall.EBADF + return + } + ho := v.findHandle(nodeOut, fhOut) + if fhOut == 0 || ho == nil || ho.inode != nodeOut { + err = syscall.EBADF + return + } + if hi.reader == nil { + err = syscall.EBADF + return + } + if ho.writer == nil { + err = syscall.EACCES + return + } + if offIn >= maxFileSize || offIn+size >= maxFileSize || offOut >= maxFileSize || offOut+size >= maxFileSize { + err = syscall.EFBIG + return + } + if flags != 0 { + err = syscall.EINVAL + return + } + if nodeIn == nodeOut && (offIn <= offOut && offOut < offIn+size || offOut <= offIn && offIn < offOut+size) { + err = syscall.EINVAL // overlap + return + } + + if !ho.Wlock(ctx) { + err = syscall.EINTR + return + } + defer ho.Wunlock() + defer ho.removeOp(ctx) + if nodeIn != nodeOut { + if !hi.Rlock(ctx) { + err = syscall.EINTR + return + } + defer hi.Runlock() + defer hi.removeOp(ctx) + } + + err = v.writer.Flush(ctx, nodeOut) + if err != 0 { + return + } + err = v.Meta.CopyFileRange(ctx, nodeIn, offIn, nodeOut, offOut, size, flags, &copied) + if err == 0 { + v.reader.Invalidate(nodeOut, offOut, size) + } + return +} + +func (v *VFS) doFsync(ctx Context, h *handle) (err syscall.Errno) { + if h.writer != nil { + if !h.Wlock(ctx) { + return syscall.EINTR + } + defer h.Wunlock() + defer h.removeOp(ctx) + + err = h.writer.Flush(ctx) + if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL { + err = syscall.EBADF + } + } + return err +} + +func (v *VFS) Flush(ctx Context, ino Ino, fh uint64, lockOwner uint64) (err syscall.Errno) { + if IsSpecialNode(ino) { + return + } + defer func() { logit(ctx, "flush (%d): %s", ino, strerr(err)) }() + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + + if h.writer != nil { + if !h.Wlock(ctx) { + h.cancelOp(ctx.Pid()) + err = syscall.EINTR + return + } + + err = h.writer.Flush(ctx) + if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL { + err = syscall.EBADF + } + h.removeOp(ctx) + h.Wunlock() + } else if h.reader != nil { + h.cancelOp(ctx.Pid()) + } + + h.Lock() + locks := h.locks + h.Unlock() + if locks&2 != 0 { + _ = v.Meta.Setlk(ctx, ino, lockOwner, false, F_UNLCK, 0, 0x7FFFFFFFFFFFFFFF, 0) + } + return +} + +func (v *VFS) Fsync(ctx Context, ino Ino, datasync int, fh uint64) (err syscall.Errno) { + defer func() { logit(ctx, "fsync (%d,%d): %s", ino, datasync, strerr(err)) }() + if IsSpecialNode(ino) { + return + } + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + err = v.doFsync(ctx, h) + return +} + +const ( + xattrMaxName = 255 + xattrMaxSize = 65536 +) + +func (v *VFS) SetXattr(ctx Context, ino Ino, name string, value []byte, flags uint32) (err syscall.Errno) { + defer func() { logit(ctx, "setxattr (%d,%s,%d,%d): %s", ino, name, len(value), flags, strerr(err)) }() + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + if len(value) > xattrMaxSize { + if runtime.GOOS == "darwin" { + err = syscall.E2BIG + } else { + err = syscall.ERANGE + } + return + } + if len(name) > xattrMaxName { + if runtime.GOOS == "darwin" { + err = syscall.EPERM + } else { + err = syscall.ERANGE + } + return + } + if len(name) == 0 { + err = syscall.EINVAL + return + } + if name == "system.posix_acl_access" || name == "system.posix_acl_default" { + err = syscall.ENOTSUP + return + } + err = v.Meta.SetXattr(ctx, ino, name, value, flags) + return +} + +func (v *VFS) GetXattr(ctx Context, ino Ino, name string, size uint32) (value []byte, err syscall.Errno) { + defer func() { logit(ctx, "getxattr (%d,%s,%d): %s (%d)", ino, name, size, strerr(err), len(value)) }() + + if IsSpecialNode(ino) { + err = meta.ENOATTR + return + } + if len(name) > xattrMaxName { + if runtime.GOOS == "darwin" { + err = syscall.EPERM + } else { + err = syscall.ERANGE + } + return + } + if len(name) == 0 { + err = syscall.EINVAL + return + } + if name == "system.posix_acl_access" || name == "system.posix_acl_default" { + err = syscall.ENOTSUP + return + } + err = v.Meta.GetXattr(ctx, ino, name, &value) + if size > 0 && len(value) > int(size) { + err = syscall.ERANGE + } + return +} + +func (v *VFS) ListXattr(ctx Context, ino Ino, size int) (data []byte, err syscall.Errno) { + defer func() { logit(ctx, "listxattr (%d,%d): %s (%d)", ino, size, strerr(err), len(data)) }() + if IsSpecialNode(ino) { + err = meta.ENOATTR + return + } + err = v.Meta.ListXattr(ctx, ino, &data) + if size > 0 && len(data) > size { + err = syscall.ERANGE + } + return +} + +func (v *VFS) RemoveXattr(ctx Context, ino Ino, name string) (err syscall.Errno) { + defer func() { logit(ctx, "removexattr (%d,%s): %s", ino, name, strerr(err)) }() + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + if name == "system.posix_acl_access" || name == "system.posix_acl_default" { + return syscall.ENOTSUP + } + if len(name) > xattrMaxName { + if runtime.GOOS == "darwin" { + err = syscall.EPERM + } else { + err = syscall.ERANGE + } + return + } + if len(name) == 0 { + err = syscall.EINVAL + return + } + err = v.Meta.RemoveXattr(ctx, ino, name) + return +} + +var logger = utils.GetLogger("juicefs") + +type VFS struct { + Conf *Config + Meta meta.Meta + Store chunk.ChunkStore + reader DataReader + writer DataWriter + + handles map[Ino][]*handle + hanleM sync.Mutex + nextfh uint64 + + handlersGause prometheus.GaugeFunc + usedBufferSize prometheus.GaugeFunc + storeCacheSize prometheus.GaugeFunc +} + +func NewVFS(conf *Config, m meta.Meta, store chunk.ChunkStore) *VFS { + reader := NewDataReader(conf, m, store) + writer := NewDataWriter(conf, m, store, reader) + + v := &VFS{ + Conf: conf, + Meta: m, + Store: store, + reader: reader, + writer: writer, + handles: make(map[Ino][]*handle), + nextfh: 1, + } + + if conf.Meta.Subdir != "" { // don't show trash directory + internalNodes = internalNodes[:len(internalNodes)-1] + } + + v.handlersGause = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "fuse_open_handlers", + Help: "number of open files and directories.", + }, func() float64 { + v.hanleM.Lock() + defer v.hanleM.Unlock() + return float64(len(v.handles)) + }) + v.usedBufferSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "used_buffer_size_bytes", + Help: "size of currently used buffer.", + }, func() float64 { + if dw, ok := writer.(*dataWriter); ok { + return float64(dw.usedBufferSize()) + } + return 0.0 + }) + v.storeCacheSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "store_cache_size_bytes", + Help: "size of store cache.", + }, func() float64 { + if dw, ok := writer.(*dataWriter); ok { + return float64(dw.store.UsedMemory()) + } + return 0.0 + }) + _ = prometheus.Register(v.handlersGause) + _ = prometheus.Register(v.usedBufferSize) + _ = prometheus.Register(v.storeCacheSize) + return v +} + +func InitMetrics() { + prometheus.MustRegister(readSizeHistogram) + prometheus.MustRegister(writtenSizeHistogram) + prometheus.MustRegister(opsDurationsHistogram) + prometheus.MustRegister(compactSizeHistogram) +} diff --git a/pkg/vfs/vfs_test.go b/pkg/vfs/vfs_test.go new file mode 100644 index 0000000..b97211c --- /dev/null +++ b/pkg/vfs/vfs_test.go @@ -0,0 +1,778 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "fmt" + "log" + "reflect" + "strings" + "syscall" + "testing" + "time" + + "github.com/google/uuid" + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/utils" + "golang.org/x/sys/unix" +) + +// nolint:errcheck + +func createTestVFS() (*VFS, object.ObjectStorage) { + mp := "/jfs" + metaConf := &meta.Config{ + Retries: 10, + Strict: true, + MountPoint: mp, + } + m := meta.NewClient("memkv://", metaConf) + format := meta.Format{ + Name: "test", + UUID: uuid.New().String(), + Storage: "mem", + BlockSize: 4096, + Compression: "lz4", + } + err := m.Init(format, true) + if err != nil { + log.Fatalf("setting: %s", err) + } + conf := &Config{ + Meta: metaConf, + Format: &format, + Version: "Juicefs", + Mountpoint: mp, + Chunk: &chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + MaxUpload: 2, + BufferSize: 30 << 20, + CacheSize: 10, + CacheDir: "memory", + }, + } + + blob, _ := object.CreateStorage("mem", "", "", "") + store := chunk.NewCachedStore(blob, *conf.Chunk) + return NewVFS(conf, m, store), blob +} + +func TestVFSBasic(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.NewContext(10, 1, []uint32{2})) + + if st, e := v.StatFS(ctx, 1); e != 0 { + t.Fatalf("statfs 1: %s", e) + } else if st.Total-st.Avail != 0 { + t.Fatalf("used: %d", st.Total-st.Avail) + } + + // dirs + de, e := v.Mkdir(ctx, 1, "d1", 0755, 0) + if e != 0 { + t.Fatalf("mkdir d1: %s", e) + } + if _, e := v.Mkdir(ctx, de.Inode, "d2", 0755, 0); e != 0 { + t.Fatalf("mkdir d1/d2: %s", e) + } + if e := v.Rmdir(ctx, 1, "d1"); e != syscall.ENOTEMPTY { + t.Fatalf("rmdir not empty: %s", e) + } + if e := v.Rmdir(ctx, de.Inode, "d2"); e != 0 { + t.Fatalf("rmdir d1/d2: %s", e) + } + + // files + fe, e := v.Mknod(ctx, de.Inode, "f1", 0644|syscall.S_IFREG, 0, 0) + if e != 0 { + t.Fatalf("mknod d1/f1: %s", e) + } + if e := v.Access(ctx, fe.Inode, unix.X_OK); e != syscall.EACCES { + t.Fatalf("access d1/f1: %s", e) + } + if _, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrMtimeNow|meta.SetAttrAtimeNow, 0, 0, 0, 0, 0, 0, 0, 0, 0); e != 0 { + t.Fatalf("setattr d1/f2 mtimeNow: %s", e) + } + if fe2, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrMode|meta.SetAttrUID|meta.SetAttrGID|meta.SetAttrAtime|meta.SetAttrMtime|meta.SetAttrSize, 0, 0755, 2, 3, 1234, 1234, 5678, 5678, 1024); e != 0 { + t.Fatalf("setattr d1/f1: %s %d %d", e, fe2.Attr.Gid, fe2.Attr.Length) + } else if fe2.Attr.Mode != 0755 || fe2.Attr.Uid != 2 || fe2.Attr.Gid != 3 || fe2.Attr.Atime != 1234 || fe2.Attr.Atimensec != 5678 || fe2.Attr.Mtime != 1234 || fe2.Attr.Mtimensec != 5678 || fe2.Attr.Length != 1024 { + t.Fatalf("setattr d1/f1: %+v", fe2.Attr) + } + if e := v.Access(ctx, fe.Inode, unix.X_OK); e != 0 { + t.Fatalf("access d1/f1: %s", e) + } + if _, e := v.Link(ctx, fe.Inode, 1, "f2"); e != 0 { + t.Fatalf("link f2->f1: %s", e) + } + if fe, e := v.GetAttr(ctx, fe.Inode, 0); e != 0 || fe.Attr.Nlink != 2 { + t.Fatalf("getattr d1/f2: %s %d", e, fe.Attr.Nlink) + } + if e := v.Unlink(ctx, de.Inode, "f1"); e != 0 { + t.Fatalf("unlink d1/f1: %s", e) + } + if fe, e := v.Lookup(ctx, 1, "f2"); e != 0 || fe.Attr.Nlink != 1 { + t.Fatalf("lookup f2: %s", e) + } + if e := v.Rename(ctx, 1, "f2", 1, "f3", 0); e != 0 { + t.Fatalf("rename f2 -> f3: %s", e) + } + if fe, fh, e := v.Open(ctx, fe.Inode, syscall.O_RDONLY); e != 0 { + t.Fatalf("open f3: %s", e) + } else if e := v.Flush(ctx, fe.Inode, fh, 0); e != 0 { + t.Fatalf("close f3: %s", e) + } else { + v.Release(ctx, fe.Inode, fh) + } + + // symlink + if fe, e := v.Symlink(ctx, "f2", 1, "sym"); e != 0 { + t.Fatalf("symlink sym -> f2: %s", e) + } else if target, e := v.Readlink(ctx, fe.Inode); e != 0 || string(target) != "f2" { + t.Fatalf("readlink sym: %s %s", e, string(target)) + } + + // edge cases + longName := strings.Repeat("a", 256) + if _, e = v.Lookup(ctx, 1, longName); e != syscall.ENAMETOOLONG { + t.Fatalf("lookup long name") + } + if _, _, e = v.Create(ctx, 1, longName, 0, 0, 0); e != syscall.ENAMETOOLONG { + t.Fatalf("create long name") + } + if _, e = v.Mknod(ctx, 1, longName, 0, 0, 0); e != syscall.ENAMETOOLONG { + t.Fatalf("mknod long name") + } + if _, e = v.Mkdir(ctx, 1, longName, 0, 0); e != syscall.ENAMETOOLONG { + t.Fatalf("mkdir long name") + } + if _, e = v.Link(ctx, 2, 1, longName); e != syscall.ENAMETOOLONG { + t.Fatalf("link long name") + } + if e = v.Unlink(ctx, 1, longName); e != syscall.ENAMETOOLONG { + t.Fatalf("unlink long name") + } + if e = v.Rmdir(ctx, 1, longName); e != syscall.ENAMETOOLONG { + t.Fatalf("rmdir long name") + } + if _, e = v.Symlink(ctx, "", 1, longName); e != syscall.ENAMETOOLONG { + t.Fatalf("symlink long name") + } + if e = v.Rename(ctx, 1, "a", 1, longName, 0); e != syscall.ENAMETOOLONG { + t.Fatalf("rename long name") + } + if e = v.Rename(ctx, 1, longName, 1, "a", 0); e != syscall.ENAMETOOLONG { + t.Fatalf("rename long name") + } + +} + +func TestVFSIO(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.Background) + fe, fh, e := v.Create(ctx, 1, "file", 0755, 0, syscall.O_RDWR) + if e != 0 { + t.Fatalf("create file: %s", e) + } + if e = v.Fallocate(ctx, fe.Inode, 0, 0, 64<<10, fh); e != 0 { + t.Fatalf("fallocate : %s", e) + } + if e = v.Write(ctx, fe.Inode, []byte("hello"), 0, fh); e != 0 { + t.Fatalf("write file: %s", e) + } + if e = v.Fsync(ctx, fe.Inode, 1, fh); e != 0 { + t.Fatalf("fsync file: %s", e) + } + if e = v.Write(ctx, fe.Inode, []byte("hello"), 100<<20, fh); e != 0 { + t.Fatalf("write file: %s", e) + } + var attr meta.Attr + if e = v.Truncate(ctx, fe.Inode, (100<<20)+2, 1, &attr); e != 0 { + t.Fatalf("truncate file: %s", e) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 10, 0); e != 0 || n != 10 { + t.Fatalf("copyfilerange: %s %d", e, n) + } + var buf = make([]byte, 128<<10) + if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 { + t.Fatalf("read file: %s", e) + } else if n != len(buf) { + t.Fatalf("short read file: %d != %d", n, len(buf)) + } else if string(buf[:5]) != "hello" { + t.Fatalf("unexpected data: %q", string(buf[:5])) + } + if n, e := v.Read(ctx, fe.Inode, buf[:6], 10<<20, fh); e != 0 || n != 6 || string(buf[:n]) != "hello\x00" { + t.Fatalf("read file end: %s %d %s", e, n, string(buf[:n])) + } + if n, e := v.Read(ctx, fe.Inode, buf, 100<<20, fh); e != 0 || n != 2 || string(buf[:n]) != "he" { + t.Fatalf("read file end: %s %d %s", e, n, string(buf[:n])) + } + if e = v.Flush(ctx, fe.Inode, fh, 0); e != 0 { + t.Fatalf("flush file: %s", e) + } + + // edge cases + _, fh2, _ := v.Open(ctx, fe.Inode, syscall.O_RDONLY) + // read + if _, e = v.Read(ctx, fe.Inode, nil, 0, 0); e != syscall.EBADF { + t.Fatalf("read bad fd: %s", e) + } + if _, e = v.Read(ctx, fe.Inode, nil, 1<<60, fh2); e != syscall.EFBIG { + t.Fatalf("read off too big: %s", e) + } + // write + if e = v.Write(ctx, fe.Inode, nil, 0, 0); e != syscall.EBADF { + t.Fatalf("write bad fd: %s", e) + } + if e = v.Write(ctx, fe.Inode, nil, 1<<60, fh2); e != syscall.EFBIG { + t.Fatalf("write off too big: %s", e) + } + if e = v.Write(ctx, fe.Inode, make([]byte, 1024), 0, fh2); e != syscall.EACCES { + t.Fatalf("write off too big: %s", e) + } + // truncate + if e = v.Truncate(ctx, fe.Inode, -1, 0, &meta.Attr{}); e != syscall.EINVAL { + t.Fatalf("truncate invalid off,length: %s", e) + } + if e = v.Truncate(ctx, fe.Inode, 1<<60, 0, &meta.Attr{}); e != syscall.EFBIG { + t.Fatalf("truncate too large: %s", e) + } + // fallocate + if e = v.Fallocate(ctx, fe.Inode, 0, -1, -1, fh); e != syscall.EINVAL { + t.Fatalf("fallocate invalid off,length: %s", e) + } + if e = v.Fallocate(ctx, statsInode, 0, 0, 1, fh); e != syscall.EPERM { + t.Fatalf("fallocate invalid off,length: %s", e) + } + if e = v.Fallocate(ctx, fe.Inode, 0, 0, 100, 0); e != syscall.EBADF { + t.Fatalf("fallocate invalid off,length: %s", e) + } + if e = v.Fallocate(ctx, fe.Inode, 0, 1<<60, 1<<60, fh); e != syscall.EFBIG { + t.Fatalf("fallocate invalid off,length: %s", e) + } + if e = v.Fallocate(ctx, fe.Inode, 0, 1<<10, 1<<20, fh2); e != syscall.EACCES { + t.Fatalf("fallocate invalid off,length: %s", e) + } + + // copy file range + if n, e := v.CopyFileRange(ctx, statsInode, fh, 0, fe.Inode, fh, 10<<20, 10, 0); e != syscall.ENOTSUP { + t.Fatalf("copyfilerange internal file: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, statsInode, fh, 10<<20, 10, 0); e != syscall.EPERM { + t.Fatalf("copyfilerange internal file: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, 0, 0, fe.Inode, fh, 10<<20, 10, 0); e != syscall.EBADF { + t.Fatalf("copyfilerange invalid fh: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, 0, 10<<20, 10, 0); e != syscall.EBADF { + t.Fatalf("copyfilerange invalid fh: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 10, 1); e != syscall.EINVAL { + t.Fatalf("copyfilerange invalid flag: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 1<<50, 0); e != syscall.EINVAL { + t.Fatalf("copyfilerange overlap: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 1<<63, 1<<63, 0); e != syscall.EFBIG { + t.Fatalf("copyfilerange too big file: %s %d", e, n) + } + if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh2, 1<<20, 1<<10, 0); e != syscall.EACCES { + t.Fatalf("copyfilerange too big file: %s %d", e, n) + } + + // sequntial write/read + for i := uint64(0); i < 1001; i++ { + if e := v.Write(ctx, fe.Inode, make([]byte, 128<<10), i*(128<<10), fh); e != 0 { + t.Fatalf("write big file: %s", e) + } + } + buf = make([]byte, 128<<10) + for i := uint64(0); i < 1000; i++ { + if n, e := v.Read(ctx, fe.Inode, buf, i*(128<<10), fh); e != 0 || n != (128<<10) { + t.Fatalf("read big file: %s", e) + } else { + for j := 0; j < 128<<10; j++ { + if buf[j] != 0 { + t.Fatalf("read big file: %d %d", j, buf[j]) + } + } + } + } + // many small write + buf = make([]byte, 5<<10) + for j := range buf { + buf[j] = 1 + } + for i := int64(32 - 1); i >= 0; i-- { + if e := v.Write(ctx, fe.Inode, buf, uint64(i)*(4<<10), fh); e != 0 { + t.Fatalf("write big file: %s", e) + } + } + time.Sleep(time.Millisecond * 1500) // wait for it to be flushed + buf = make([]byte, 128<<10) + if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 || n != (128<<10) { + t.Fatalf("read big file: %s", e) + } else { + for j := range buf { + if buf[j] != 1 { + t.Fatalf("read big file: %d %d", j, buf[j]) + } + } + } + + v.Release(ctx, fe.Inode, fh) +} + +func TestVFSXattrs(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.Background) + fe, e := v.Mkdir(ctx, 1, "xattrs", 0755, 0) + if e != 0 { + t.Fatalf("mkdir xattrs: %s", e) + } + // normal cases + if _, e := v.GetXattr(ctx, fe.Inode, "test", 0); e != meta.ENOATTR { + t.Fatalf("getxattr not existed: %s", e) + } + if e := v.SetXattr(ctx, fe.Inode, "test", []byte("value"), 0); e != 0 { + t.Fatalf("setxattr test: %s", e) + } + if e = v.SetXattr(ctx, fe.Inode, "test", []byte("v1"), meta.XattrCreate); e == 0 { + t.Fatalf("setxattr test (create): %s", e) + } + if v, e := v.ListXattr(ctx, fe.Inode, 100); e != 0 || string(v) != "test\x00" { + t.Fatalf("listxattr: %s %q", e, string(v)) + } + if v, e := v.GetXattr(ctx, fe.Inode, "test", 5); e != 0 || string(v) != "value" { + t.Fatalf("getxattr test: %s %v", e, v) + } + if e = v.SetXattr(ctx, fe.Inode, "test", []byte("v2"), meta.XattrReplace); e != 0 { + t.Fatalf("setxattr test (replace): %s", e) + } + if v, e := v.GetXattr(ctx, fe.Inode, "test", 5); e != 0 || string(v) != "v2" { + t.Fatalf("getxattr test: %s %v", e, v) + } + if _, e := v.GetXattr(ctx, fe.Inode, "test", 1); e != syscall.ERANGE { + t.Fatalf("getxattr large value: %s", e) + } + if v, e := v.ListXattr(ctx, fe.Inode, 1); e != syscall.ERANGE { + t.Fatalf("listxattr: %s %q", e, string(v)) + } + if e := v.RemoveXattr(ctx, fe.Inode, "test"); e != 0 { + t.Fatalf("removexattr test: %s", e) + } + if _, e := v.GetXattr(ctx, fe.Inode, "test", 0); e != meta.ENOATTR { + t.Fatalf("getxattr not existed: %s", e) + } + if v, e := v.ListXattr(ctx, fe.Inode, 100); e != 0 || string(v) != "" { + t.Fatalf("listxattr: %s %q", e, string(v)) + } + // edge case + if e = v.SetXattr(ctx, fe.Inode, "", []byte("v2"), 0); e != syscall.EINVAL { + t.Fatalf("setxattr long key: %s", e) + } + if e = v.SetXattr(ctx, fe.Inode, strings.Repeat("test", 100), []byte("v2"), 0); e != syscall.EPERM && e != syscall.ERANGE { + t.Fatalf("setxattr long key: %s", e) + } + if e = v.SetXattr(ctx, fe.Inode, "test", make([]byte, 1<<20), 0); e != syscall.E2BIG && e != syscall.ERANGE { + t.Fatalf("setxattr long key: %s", e) + } + if e = v.SetXattr(ctx, fe.Inode, "system.posix_acl_access", []byte("v2"), 0); e != syscall.ENOTSUP { + t.Fatalf("setxattr long key: %s", e) + } + if e = v.SetXattr(ctx, configInode, "test", []byte("v2"), 0); e != syscall.EPERM { + t.Fatalf("setxattr long key: %s", e) + } + if _, e := v.GetXattr(ctx, fe.Inode, "", 0); e != syscall.EINVAL { + t.Fatalf("getxattr not existed: %s", e) + } + if _, e := v.GetXattr(ctx, fe.Inode, strings.Repeat("test", 100), 0); e == 0 { + t.Fatalf("getxattr not existed: %s", e) + } + if _, e := v.GetXattr(ctx, configInode, "test", 0); e != meta.ENOATTR { + t.Fatalf("getxattr not existed: %s", e) + } + if _, e := v.GetXattr(ctx, fe.Inode, "system.posix_acl_access", 0); e != syscall.ENOTSUP { + t.Fatalf("getxattr not existed: %s", e) + } + if v, e := v.ListXattr(ctx, configInode, 0); e != meta.ENOATTR { + t.Fatalf("listxattr: %s %q", e, string(v)) + } + if e := v.RemoveXattr(ctx, fe.Inode, strings.Repeat("test", 100)); e != syscall.EPERM && e != syscall.ERANGE { + t.Fatalf("removexattr test: %s", e) + } + if e := v.RemoveXattr(ctx, fe.Inode, ""); e != syscall.EINVAL { + t.Fatalf("removexattr test: %s", e) + } + if e := v.RemoveXattr(ctx, fe.Inode, "system.posix_acl_access"); e != syscall.ENOTSUP { + t.Fatalf("removexattr test: %s", e) + } + if e := v.RemoveXattr(ctx, configInode, "test"); e != syscall.EPERM { + t.Fatalf("removexattr test: %s", e) + } +} + +type accessCase struct { + uid uint32 + gid uint32 + mode uint16 + r syscall.Errno +} + +func TestAccessMode(t *testing.T) { + var attr = meta.Attr{ + Uid: 1, + Gid: 2, + Mode: 0751, + } + + cases := []accessCase{ + {0, 0, MODE_MASK_R | MODE_MASK_W | MODE_MASK_X, 0}, + {1, 3, MODE_MASK_R | MODE_MASK_W | MODE_MASK_X, 0}, + {2, 2, MODE_MASK_R | MODE_MASK_X, 0}, + {2, 2, MODE_MASK_W, syscall.EACCES}, + {3, 4, MODE_MASK_X, 0}, + {3, 4, MODE_MASK_R, syscall.EACCES}, + {3, 4, MODE_MASK_W, syscall.EACCES}, + } + for _, c := range cases { + if e := accessTest(&attr, c.mode, c.uid, c.gid); e != c.r { + t.Fatalf("expect %s on case %+v, but got %s", c.r, c, e) + } + } +} + +func assertEqual(t *testing.T, a interface{}, b interface{}) { + if reflect.DeepEqual(a, b) { + return + } + message := fmt.Sprintf("%v != %v", a, b) + t.Fatal(message) +} + +func TestSetattrStr(t *testing.T) { + assertEqual(t, setattrStr(0, 0, 0, 0, 0, 0, 0), "") + assertEqual(t, setattrStr(meta.SetAttrMode, 01755, 0, 0, 0, 0, 0), "mode=?rwxr-xr-t:01755") + assertEqual(t, setattrStr(meta.SetAttrUID, 0, 1, 0, 0, 0, 0), "uid=1") + assertEqual(t, setattrStr(meta.SetAttrGID, 0, 1, 2, 0, 0, 0), "gid=2") + assertEqual(t, setattrStr(meta.SetAttrAtime, 0, 0, 0, -2, -1, 0), "atime=NOW") + assertEqual(t, setattrStr(meta.SetAttrAtime, 0, 0, 0, 123, 123, 0), "atime=123") + assertEqual(t, setattrStr(meta.SetAttrAtimeNow, 0, 0, 0, 0, 0, 0), "atime=NOW") + assertEqual(t, setattrStr(meta.SetAttrMtime, 0, 0, 0, 0, -1, 0), "mtime=NOW") + assertEqual(t, setattrStr(meta.SetAttrMtime, 0, 0, 0, 0, 123, 0), "mtime=123") + assertEqual(t, setattrStr(meta.SetAttrMtimeNow, 0, 0, 0, 0, 0, 0), "mtime=NOW") + assertEqual(t, setattrStr(meta.SetAttrSize, 0, 0, 0, 0, 0, 123), "size=123") + assertEqual(t, setattrStr(meta.SetAttrUID|meta.SetAttrGID, 0, 1, 2, 0, 0, 0), "uid=1,gid=2") +} + +func TestVFSLocks(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.Background) + fe, fh, e := v.Create(ctx, 1, "flock", 0644, 0, syscall.O_RDWR) + if e != 0 { + t.Fatalf("create flock: %s", e) + } + // flock + if e = v.Flock(ctx, fe.Inode, fh, 123, 100, true); e != syscall.EINVAL { + t.Fatalf("flock wr: %s", e) + } + if e = v.Flock(ctx, fe.Inode, fh, 123, syscall.F_WRLCK, true); e != 0 { + t.Fatalf("flock wr: %s", e) + } + if e := v.Flock(ctx, fe.Inode, fh, 456, syscall.F_RDLCK, false); e != syscall.EAGAIN { + t.Fatalf("flock rd: should block") + } + + done := make(chan bool) + go func() { + _ = v.Flock(ctx, fe.Inode, fh, 456, syscall.F_RDLCK, true) + done <- true + }() + if e := v.Flock(ctx, fe.Inode, fh, 123, syscall.F_UNLCK, true); e != 0 { + t.Fatalf("flock unlock: %s", e) + } + select { + case <-done: + case <-time.NewTimer(time.Millisecond * 100).C: + t.Fatalf("flock timeout on rdlock") + } + if e := v.Flock(ctx, fe.Inode, fh, 456, syscall.F_UNLCK, true); e != 0 { + t.Fatalf("flock unlock rd: %s", e) + } + + // posix lock + if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 100, 100, 1, true); e != syscall.EINVAL { + t.Fatalf("setlk: %s", e) + } + if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 100, syscall.F_WRLCK, 1, true); e != 0 { + t.Fatalf("setlk: %s", e) + } + var start, len uint64 = 10, 1000 + var typ, pid uint32 = syscall.LOCK_UN, 10 + if e = v.Getlk(ctx, fe.Inode, fh, 2, &start, &len, &typ, &pid); e != syscall.EINVAL { + t.Fatalf("getlk: %s", e) + } + typ = syscall.F_RDLCK + if e = v.Getlk(ctx, fe.Inode, fh, 2, &start, &len, &typ, &pid); e != 0 { + t.Fatalf("getlk: %s", e) + } else if start != 0 || len != 100 || typ != syscall.F_WRLCK || pid != 1 { + t.Fatalf("getlk result: %d %d %d %d", start, len, typ, pid) + } + if e = v.Setlk(ctx, fe.Inode, fh, 2, 10, 100, syscall.F_RDLCK, 10, false); e != syscall.EAGAIN { + t.Fatalf("setlk rd: %s", e) + } + go func() { + _ = v.Setlk(ctx, fe.Inode, fh, 2, 10, 100, syscall.F_RDLCK, 10, false) + done <- true + }() + if e = v.Setlk(ctx, fe.Inode, fh, 1, 10, 100, syscall.F_UNLCK, 1, true); e != 0 { + t.Fatalf("setlk unlock: %s", e) + } + select { + case <-done: + case <-time.NewTimer(time.Millisecond * 100).C: + t.Fatalf("setlk timeout on rdlock") + } + if e = v.Setlk(ctx, fe.Inode, fh, 2, 0, 20, syscall.F_RDLCK, 10, false); e != syscall.EAGAIN { + t.Fatalf("setlk rd: %s", e) + } + if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != 0 { + t.Fatalf("setlk unlock: %s", e) + } + if e = v.Flush(ctx, fe.Inode, fh, 0); e != 0 { + t.Fatalf("flush: %s", e) + } + v.Release(ctx, fe.Inode, fh) + // invalid fd + if e = v.Flock(ctx, fe.Inode, 10, 123, syscall.F_WRLCK, true); e != syscall.EBADF { + t.Fatalf("flock wr: %s", e) + } + if e = v.Setlk(ctx, fe.Inode, 10, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != syscall.EBADF { + t.Fatalf("setlk unlock: %s", e) + } + if e = v.Getlk(ctx, fe.Inode, 10, 2, &start, &len, &typ, &pid); e != syscall.EBADF { + t.Fatalf("getlk: %s", e) + } + // internal file + fe, _ = v.Lookup(ctx, 1, ".stats") + if e = v.Flock(ctx, fe.Inode, 10, 123, syscall.F_WRLCK, true); e != syscall.EPERM { + t.Fatalf("flock wr: %s", e) + } + if e = v.Setlk(ctx, fe.Inode, 10, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != syscall.EPERM { + t.Fatalf("setlk unlock: %s", e) + } + if e = v.Getlk(ctx, fe.Inode, 10, 2, &start, &len, &typ, &pid); e != syscall.EPERM { + t.Fatalf("getlk: %s", e) + } +} + +func TestInternalFile(t *testing.T) { + v, _ := createTestVFS() + ctx := NewLogContext(meta.Background) + // list internal files + fh, _ := v.Opendir(ctx, 1) + entries, e := v.Readdir(ctx, 1, 1024, 0, fh, true) + if e != 0 { + t.Fatalf("readdir 1: %s", e) + } + internalFiles := make(map[string]bool) + for _, e := range entries { + if IsSpecialName(string(e.Name)) && e.Attr.Typ == meta.TypeFile { + internalFiles[string(e.Name)] = true + } + } + if len(internalFiles) != 4 { + t.Fatalf("there should be 4 internal files but got %d", len(internalFiles)) + } + v.Releasedir(ctx, 1, fh) + + // .config + ctx2 := NewLogContext(meta.NewContext(10, 111, []uint32{222})) + fe, e := v.Lookup(ctx2, 1, ".config") + if e != 0 { + t.Fatalf("lookup .config: %s", e) + } + if e := v.Access(ctx2, fe.Inode, unix.R_OK); e != syscall.EACCES { // other user can't access .config + t.Fatalf("access .config: %s", e) + } + if _, e := v.GetAttr(ctx, fe.Inode, 0); e != 0 { + t.Fatalf("getattr .config: %s", e) + } + // ignore setattr on internal files + if fe2, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrUID, 0, 0, ctx2.Uid(), 0, 0, 0, 0, 0, 0); e != 0 || fe2.Attr.Uid != fe.Attr.Uid { + t.Fatalf("can't setattr on internal files") + } + if e = v.Unlink(ctx, 1, ".config"); e != syscall.EPERM { + t.Fatalf("should not unlink internal file") + } + if _, _, e = v.Open(ctx, fe.Inode, syscall.O_WRONLY); e != syscall.EACCES { + t.Fatalf("write .config: %s", e) + } + _, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY) + if e != 0 { + t.Fatalf("open .config: %s", e) + } + buf := make([]byte, 10240) + if _, e := v.Read(ctx, fe.Inode, buf, 0, 0); e != syscall.EBADF { + t.Fatalf("read .config: %s", e) + } + if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 { + t.Fatalf("read .config: %s", e) + } else if !strings.Contains(string(buf[:n]), v.Conf.Format.UUID) { + t.Fatalf("invalid config: %q", string(buf[:n])) + } + + // .stats + fe, e = v.Lookup(ctx, 1, ".stats") + if e != 0 { + t.Fatalf("lookup .stats: %s", e) + } + if e := v.Access(ctx, fe.Inode, unix.W_OK); e != 0 { // root can do everything + t.Fatalf("access .stats: %s", e) + } + fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY) + if e != 0 { + t.Fatalf("open .stats: %s", e) + } + defer v.Release(ctx, fe.Inode, fh) + defer v.Flush(ctx, fe.Inode, fh, 0) + buf = make([]byte, 128<<10) + n, e := v.Read(ctx, fe.Inode, buf[:4<<10], 0, fh) + if e != 0 { + t.Fatalf("read .stats: %s", e) + } + if n == 4<<10 { + if n2, e := v.Read(ctx, fe.Inode, buf[n:], uint64(n), fh); e != 0 { + t.Fatalf("read .stats 2: %s", e) + } else { + n += n2 + } + } + if !strings.Contains(string(buf[:n]), "fuse_open_handlers") { + t.Fatalf(".stats should contains `memory`, but got %s", string(buf[:n])) + } + if e = v.Truncate(ctx, fe.Inode, 0, 1, &meta.Attr{}); e != syscall.EPERM { + t.Fatalf("truncate .config: %s", e) + } + + // accesslog + fe, e = v.Lookup(ctx, 1, ".accesslog") + if e != 0 { + t.Fatalf("lookup .accesslog: %s", e) + } + fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY) + if e != 0 { + t.Fatalf("open .accesslog: %s", e) + } + if n, e = v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 { + t.Fatalf("read .accesslog: %s", e) + } else if !strings.Contains(string(buf[:n]), "#\n") { + t.Fatalf("invalid access log: %q", string(buf[:n])) + } + _ = v.Flush(ctx, fe.Inode, fh, 0) + v.Release(ctx, fe.Inode, fh) + + // control messages + fe, e = v.Lookup(ctx, 1, ".control") + if e != 0 { + t.Fatalf("lookup .control: %s", e) + } + fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDWR) + if e != 0 { + t.Fatalf("open .stats: %s", e) + } + // rmr + buf = make([]byte, 4+4+8+1+4) + w := utils.FromBuffer(buf) + w.Put32(meta.Rmr) + w.Put32(13) + w.Put64(1) + w.Put8(4) + w.Put([]byte("file")) + if e := v.Write(ctx, fe.Inode, w.Bytes(), 0, fh); e != 0 { + t.Fatalf("write info: %s", e) + } + var off uint64 = uint64(len(buf)) + resp := make([]byte, 1024*10) + if n, e := v.Read(ctx, fe.Inode, resp, off, fh); e != 0 || n != 1 { + t.Fatalf("read result: %s %d", e, n) + } else if resp[0] != byte(syscall.ENOENT) { + t.Fatalf("rmr result: %s", string(buf[:n])) + } else { + off += uint64(n) + } + // info + buf = make([]byte, 4+4+8) + w = utils.FromBuffer(buf) + w.Put32(meta.Info) + w.Put32(8) + w.Put64(1) + if e := v.Write(ctx, fe.Inode, w.Bytes(), off, fh); e != 0 { + t.Fatalf("write info: %s", e) + } + off += uint64(len(buf)) + buf = make([]byte, 1024*10) + if n, e := v.Read(ctx, fe.Inode, buf, off, fh); e != 0 || n == 0 { + t.Fatalf("read result: %s", e) + } else if !strings.Contains(string(buf[:n]), "dirs:") { + t.Fatalf("info result: %s", string(buf[:n])) + } else { + off += uint64(n) + } + // fill + buf = make([]byte, 4+4+8+1+1+2+1) + w = utils.FromBuffer(buf) + w.Put32(meta.FillCache) + w.Put32(13) + w.Put64(1) + w.Put8(1) + w.Put([]byte("/")) + w.Put16(2) + w.Put8(0) + if e := v.Write(ctx, fe.Inode, w.Bytes()[:10], 0, fh); e != 0 { + t.Fatalf("write fill 1: %s", e) + } + if e := v.Write(ctx, fe.Inode, w.Bytes()[10:], 0, fh); e != 0 { + t.Fatalf("write fill 2: %s", e) + } + off += uint64(len(buf)) + resp = make([]byte, 1024*10) + if n, e = v.Read(ctx, fe.Inode, resp, off, fh); e != 0 || n != 1 { + t.Fatalf("read result: %s", e) + } else if resp[0] != 0 { + t.Fatalf("fill result: %s", string(buf[:n])) + } + off += uint64(n) + + // invalid msg + buf = make([]byte, 4+4+2) + w = utils.FromBuffer(buf) + w.Put32(meta.Rmr) + w.Put32(0) + if e := v.Write(ctx, fe.Inode, buf, off, fh); e != 0 { + t.Fatalf("write info: %s", e) + } + off += uint64(len(buf)) + resp = make([]byte, 1024) + if n, e := v.Read(ctx, fe.Inode, resp, off, fh); e != 0 || n != 1 { + t.Fatalf("read result: %s %d", e, n) + } else if resp[0] != uint8(syscall.EIO) { + t.Fatalf("result: %s", string(resp[:n])) + } +} diff --git a/pkg/vfs/vfs_unix.go b/pkg/vfs/vfs_unix.go new file mode 100644 index 0000000..934fac5 --- /dev/null +++ b/pkg/vfs/vfs_unix.go @@ -0,0 +1,290 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "fmt" + "strconv" + "strings" + "syscall" + + "github.com/juicedata/juicefs/pkg/meta" + + "golang.org/x/sys/unix" +) + +const O_ACCMODE = syscall.O_ACCMODE +const F_UNLCK = syscall.F_UNLCK + +type Statfs struct { + Total uint64 + Avail uint64 + Files uint64 + Favail uint64 +} + +func (v *VFS) StatFS(ctx Context, ino Ino) (st *Statfs, err syscall.Errno) { + var totalspace, availspace, iused, iavail uint64 + _ = v.Meta.StatFS(ctx, &totalspace, &availspace, &iused, &iavail) + st = new(Statfs) + st.Total = totalspace + st.Avail = availspace + st.Files = iused + iavail + st.Favail = iavail + logit(ctx, "statfs (%d): OK (%d,%d,%d,%d)", ino, totalspace-availspace, availspace, iused, iavail) + return +} + +func accessTest(attr *Attr, mmode uint16, uid uint32, gid uint32) syscall.Errno { + if uid == 0 { + return 0 + } + mode := attr.Mode + var effected uint16 + if uid == attr.Uid { + effected = (mode >> 6) & 7 + } else { + effected = mode & 7 + if gid == attr.Gid { + effected = (mode >> 3) & 7 + } + } + if mmode&effected != mmode { + return syscall.EACCES + } + return 0 +} + +func (v *VFS) Access(ctx Context, ino Ino, mask int) (err syscall.Errno) { + defer func() { logit(ctx, "access (%d,0x%X): %s", ino, mask, strerr(err)) }() + var mmask uint16 + if mask&unix.R_OK != 0 { + mmask |= MODE_MASK_R + } + if mask&unix.W_OK != 0 { + mmask |= MODE_MASK_W + } + if mask&unix.X_OK != 0 { + mmask |= MODE_MASK_X + } + if IsSpecialNode(ino) { + node := getInternalNode(ino) + if node != nil { + err = accessTest(node.attr, mmask, ctx.Uid(), ctx.Gid()) + return + } + } + + err = v.Meta.Access(ctx, ino, uint8(mmask), nil) + return +} + +func setattrStr(set int, mode, uid, gid uint32, atime, mtime int64, size uint64) string { + var sb strings.Builder + if set&meta.SetAttrMode != 0 { + sb.WriteString(fmt.Sprintf("mode=%s:0%04o,", smode(uint16(mode)), mode&07777)) + } + if set&meta.SetAttrUID != 0 { + sb.WriteString(fmt.Sprintf("uid=%d,", uid)) + } + if set&meta.SetAttrGID != 0 { + sb.WriteString(fmt.Sprintf("gid=%d,", gid)) + } + + var atimeStr string + if set&meta.SetAttrAtimeNow != 0 || (set&meta.SetAttrAtime) != 0 && atime < 0 { + atimeStr = "NOW" + } else if set&meta.SetAttrAtime != 0 { + atimeStr = strconv.FormatInt(atime, 10) + } + if atimeStr != "" { + sb.WriteString("atime=" + atimeStr + ",") + } + + var mtimeStr string + if set&meta.SetAttrMtimeNow != 0 || (set&meta.SetAttrMtime) != 0 && mtime < 0 { + mtimeStr = "NOW" + } else if set&meta.SetAttrMtime != 0 { + mtimeStr = strconv.FormatInt(mtime, 10) + } + if mtimeStr != "" { + sb.WriteString("mtime=" + mtimeStr + ",") + } + + if set&meta.SetAttrSize != 0 { + sizeStr := strconv.FormatUint(size, 10) + sb.WriteString("size=" + sizeStr + ",") + } + r := sb.String() + if len(r) > 1 { + r = r[:len(r)-1] // drop last , + } + return r +} + +func (v *VFS) SetAttr(ctx Context, ino Ino, set int, opened uint8, mode, uid, gid uint32, atime, mtime int64, atimensec, mtimensec uint32, size uint64) (entry *meta.Entry, err syscall.Errno) { + str := setattrStr(set, mode, uid, gid, atime, mtime, size) + defer func() { + logit(ctx, "setattr (%d,0x%X,[%s]): %s%s", ino, set, str, strerr(err), (*Entry)(entry)) + }() + if IsSpecialNode(ino) { + n := getInternalNode(ino) + if n != nil { + entry = &meta.Entry{Inode: ino, Attr: n.attr} + } else { + err = syscall.EPERM + } + return + } + err = syscall.EINVAL + var attr = &Attr{} + if set&meta.SetAttrSize != 0 { + err = v.Truncate(ctx, ino, int64(size), opened, attr) + if err != 0 { + return + } + } + if set&meta.SetAttrMode != 0 { + attr.Mode = uint16(mode & 07777) + } + if set&meta.SetAttrUID != 0 { + attr.Uid = uid + } + if set&meta.SetAttrGID != 0 { + attr.Gid = gid + } + if set&meta.SetAttrAtime != 0 { + attr.Atime = atime + attr.Atimensec = atimensec + } + if set&meta.SetAttrMtime != 0 { + attr.Mtime = mtime + attr.Mtimensec = mtimensec + } + err = v.Meta.SetAttr(ctx, ino, uint16(set), 0, attr) + if err == 0 { + v.UpdateLength(ino, attr) + entry = &meta.Entry{Inode: ino, Attr: attr} + } + return +} + +type lockType uint32 + +func (l lockType) String() string { + switch l { + case syscall.F_UNLCK: + return "U" + case syscall.F_RDLCK: + return "R" + case syscall.F_WRLCK: + return "W" + default: + return "X" + } +} + +func (v *VFS) Getlk(ctx Context, ino Ino, fh uint64, owner uint64, start, len *uint64, typ *uint32, pid *uint32) (err syscall.Errno) { + logit(ctx, "getlk (%d,%016X): %s (%d,%d,%s,%d)", ino, owner, strerr(err), *start, *len, lockType(*typ), *pid) + if lockType(*typ).String() == "X" { + return syscall.EINVAL + } + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + if v.findHandle(ino, fh) == nil { + err = syscall.EBADF + return + } + err = v.Meta.Getlk(ctx, ino, owner, typ, start, len, pid) + return +} + +func (v *VFS) Setlk(ctx Context, ino Ino, fh uint64, owner uint64, start, end uint64, typ uint32, pid uint32, block bool) (err syscall.Errno) { + defer func() { + logit(ctx, "setlk (%d,%016X,%d,%d,%s,%t,%d): %s", ino, owner, start, end, lockType(typ), block, pid, strerr(err)) + }() + if lockType(typ).String() == "X" { + return syscall.EINVAL + } + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + h.addOp(ctx) + defer h.removeOp(ctx) + + err = v.Meta.Setlk(ctx, ino, owner, block, typ, start, end, pid) + if err == 0 { + h.Lock() + if typ != syscall.F_UNLCK { + h.locks |= 2 + } + h.Unlock() + } + return +} + +func (v *VFS) Flock(ctx Context, ino Ino, fh uint64, owner uint64, typ uint32, block bool) (err syscall.Errno) { + var name string + var reqid uint32 + defer func() { logit(ctx, "flock (%d,%d,%016X,%s,%t): %s", reqid, ino, owner, name, block, strerr(err)) }() + switch typ { + case syscall.F_RDLCK: + name = "LOCKSH" + case syscall.F_WRLCK: + name = "LOCKEX" + case syscall.F_UNLCK: + name = "UNLOCK" + default: + err = syscall.EINVAL + return + } + + if IsSpecialNode(ino) { + err = syscall.EPERM + return + } + h := v.findHandle(ino, fh) + if h == nil { + err = syscall.EBADF + return + } + h.addOp(ctx) + defer h.removeOp(ctx) + err = v.Meta.Flock(ctx, ino, owner, typ, block) + if err == 0 { + h.Lock() + if typ == syscall.F_UNLCK { + h.locks &= 2 + } else { + h.locks |= 1 + h.flockOwner = owner + } + h.Unlock() + } + return +} diff --git a/pkg/vfs/vfs_windows.go b/pkg/vfs/vfs_windows.go new file mode 100644 index 0000000..835b99f --- /dev/null +++ b/pkg/vfs/vfs_windows.go @@ -0,0 +1,20 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +const O_ACCMODE = 0xff +const F_UNLCK = 0x01 diff --git a/pkg/vfs/writer.go b/pkg/vfs/writer.go new file mode 100644 index 0000000..6773aa1 --- /dev/null +++ b/pkg/vfs/writer.go @@ -0,0 +1,521 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package vfs + +import ( + "math/rand" + "runtime" + "sync" + "syscall" + "time" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" +) + +const ( + flushDuration = time.Second * 5 +) + +type FileWriter interface { + Write(ctx meta.Context, offset uint64, data []byte) syscall.Errno + Flush(ctx meta.Context) syscall.Errno + Close(ctx meta.Context) syscall.Errno + GetLength() uint64 + Truncate(length uint64) +} + +type DataWriter interface { + Open(inode Ino, fleng uint64) FileWriter + Flush(ctx meta.Context, inode Ino) syscall.Errno + GetLength(inode Ino) uint64 + Truncate(inode Ino, length uint64) +} + +type sliceWriter struct { + id uint64 + chunk *chunkWriter + off uint32 + length uint32 + soff uint32 + slen uint32 + writer chunk.Writer + freezed bool + done bool + err syscall.Errno + notify *utils.Cond + started time.Time + lastMod time.Time +} + +func (s *sliceWriter) prepareID(ctx meta.Context, retry bool) { + f := s.chunk.file + f.Lock() + for s.id == 0 { + var id uint64 + f.Unlock() + st := f.w.m.NewChunk(ctx, &id) + f.Lock() + if st != 0 && st != syscall.EIO { + s.err = st + break + } + if !retry || st == 0 { + if s.id == 0 { + s.id = id + } + break + } + f.Unlock() + logger.Debugf("meta is not available: %s", st) + time.Sleep(time.Millisecond * 100) + f.Lock() + } + if s.writer != nil && s.writer.ID() == 0 { + s.writer.SetID(s.id) + } + f.Unlock() +} + +func (s *sliceWriter) markDone() { + f := s.chunk.file + f.Lock() + s.done = true + s.notify.Signal() + f.Unlock() +} + +// freezed, no more data +func (s *sliceWriter) flushData() { + defer s.markDone() + if s.slen == 0 { + return + } + s.prepareID(meta.Background, true) + if s.err != 0 { + logger.Infof("flush inode:%d chunk: %s", s.chunk.file.inode, s.err) + s.writer.Abort() + return + } + s.length = s.slen + if err := s.writer.Finish(int(s.length)); err != nil { + logger.Errorf("upload chunk %v (length: %v) fail: %s", s.id, s.length, err) + s.writer.Abort() + s.err = syscall.EIO + } + s.writer = nil +} + +// protected by s.chunk.file +func (s *sliceWriter) write(ctx meta.Context, off uint32, data []uint8) syscall.Errno { + f := s.chunk.file + _, err := s.writer.WriteAt(data, int64(off)) + if err != nil { + logger.Warnf("write: chunk: %d off: %d %s", s.id, off, err) + return syscall.EIO + } + if off+uint32(len(data)) > s.slen { + s.slen = off + uint32(len(data)) + } + s.lastMod = time.Now() + if s.slen == meta.ChunkSize { + s.freezed = true + go s.flushData() + } else if int(s.slen) >= f.w.blockSize { + if s.id > 0 { + err := s.writer.FlushTo(int(s.slen)) + if err != nil { + logger.Warnf("write: chunk: %d off: %d %s", s.id, off, err) + return syscall.EIO + } + } else if int(off) <= f.w.blockSize { + go s.prepareID(ctx, false) + } + } + return 0 +} + +type chunkWriter struct { + indx uint32 + file *fileWriter + slices []*sliceWriter +} + +// protected by file +func (c *chunkWriter) findWritableSlice(pos uint32, size uint32) *sliceWriter { + blockSize := uint32(c.file.w.blockSize) + for i := range c.slices { + s := c.slices[len(c.slices)-1-i] + if !s.freezed { + flushoff := s.slen / blockSize * blockSize + if pos >= s.off+flushoff && pos <= s.off+s.slen { + return s + } else if i > 3 { + s.freezed = true + go s.flushData() + } + } + if pos < s.off+s.slen && s.off < pos+size { + // overlaped + // TODO: write into multiple slices + return nil + } + } + return nil +} + +func (c *chunkWriter) commitThread() { + f := c.file + defer f.w.free(f) + f.Lock() + defer f.Unlock() + // the slices should be committed in the order that are created + for len(c.slices) > 0 { + s := c.slices[0] + for !s.done { + if s.notify.WaitWithTimeout(time.Millisecond*100) && !s.freezed && time.Since(s.started) > flushDuration*2 { + s.freezed = true + go s.flushData() + } + } + err := s.err + f.Unlock() + + if err == 0 { + var ss = meta.Slice{Chunkid: s.id, Size: s.length, Off: s.soff, Len: s.slen} + err = f.w.m.Write(meta.Background, f.inode, c.indx, s.off, ss) + f.w.reader.Invalidate(f.inode, uint64(c.indx)*meta.ChunkSize+uint64(s.off), uint64(ss.Len)) + } + + f.Lock() + if err != 0 { + if err != syscall.ENOENT && err != syscall.ENOSPC { + logger.Warnf("write inode:%d error: %s", f.inode, err) + err = syscall.EIO + } + f.err = err + logger.Errorf("write inode:%d indx:%d %s", f.inode, c.indx, err) + } + c.slices = c.slices[1:] + } + f.freeChunk(c) +} + +type fileWriter struct { + sync.Mutex + w *dataWriter + + inode Ino + length uint64 + err syscall.Errno + flushwaiting uint16 + writewaiting uint16 + refs uint16 + chunks map[uint32]*chunkWriter + + flushcond *utils.Cond // wait for chunks==nil (flush) + writecond *utils.Cond // wait for flushwaiting==0 (write) +} + +// protected by file +func (f *fileWriter) findChunk(i uint32) *chunkWriter { + c := f.chunks[i] + if c == nil { + c = &chunkWriter{indx: i, file: f} + f.chunks[i] = c + } + return c +} + +// protected by file +func (f *fileWriter) freeChunk(c *chunkWriter) { + delete(f.chunks, c.indx) + if len(f.chunks) == 0 && f.flushwaiting > 0 { + f.flushcond.Broadcast() + } +} + +// protected by file +func (f *fileWriter) writeChunk(ctx meta.Context, indx uint32, off uint32, data []byte) syscall.Errno { + c := f.findChunk(indx) + s := c.findWritableSlice(off, uint32(len(data))) + if s == nil { + s = &sliceWriter{ + chunk: c, + off: off, + writer: f.w.store.NewWriter(0), + notify: utils.NewCond(&f.Mutex), + started: time.Now(), + } + c.slices = append(c.slices, s) + if len(c.slices) == 1 { + f.w.Lock() + f.refs++ + f.w.Unlock() + go c.commitThread() + } + } + return s.write(ctx, off-s.off, data) +} + +func (f *fileWriter) totalSlices() int { + var cnt int + f.Lock() + for _, c := range f.chunks { + cnt += len(c.slices) + } + f.Unlock() + return cnt +} + +func (w *dataWriter) usedBufferSize() int64 { + return utils.AllocMemory() - w.store.UsedMemory() +} + +func (f *fileWriter) Write(ctx meta.Context, off uint64, data []byte) syscall.Errno { + for { + if f.totalSlices() < 1000 { + break + } + time.Sleep(time.Millisecond) + } + if f.w.usedBufferSize() > f.w.bufferSize { + // slow down + time.Sleep(time.Millisecond * 10) + for f.w.usedBufferSize() > f.w.bufferSize*2 { + time.Sleep(time.Millisecond * 100) + } + } + + s := time.Now() + f.Lock() + defer f.Unlock() + size := uint64(len(data)) + f.writewaiting++ + for f.flushwaiting > 0 { + if f.writecond.WaitWithTimeout(time.Second) && ctx.Canceled() { + f.writewaiting-- + logger.Warnf("write %d interrupted after %d", f.inode, time.Since(s)) + return syscall.EINTR + } + } + f.writewaiting-- + + indx := uint32(off / meta.ChunkSize) + pos := uint32(off % meta.ChunkSize) + for len(data) > 0 { + n := uint32(len(data)) + if pos+n > meta.ChunkSize { + n = meta.ChunkSize - pos + } + if st := f.writeChunk(ctx, indx, pos, data[:n]); st != 0 { + return st + } + data = data[n:] + indx++ + pos = (pos + n) % meta.ChunkSize + } + if off+size > f.length { + f.length = off + size + } + return f.err +} + +func (f *fileWriter) flush(ctx meta.Context, writeback bool) syscall.Errno { + s := time.Now() + f.Lock() + defer f.Unlock() + f.flushwaiting++ + + var err syscall.Errno + var wait = time.Second * time.Duration((f.w.maxRetries+1)*(f.w.maxRetries+1)/2) + if wait < time.Minute*5 { + wait = time.Minute * 5 + } + var deadline = time.Now().Add(wait) + for len(f.chunks) > 0 && err == 0 { + for _, c := range f.chunks { + for _, s := range c.slices { + if !s.freezed { + s.freezed = true + go s.flushData() + } + } + } + if f.flushcond.WaitWithTimeout(time.Second*3) && ctx.Canceled() { + logger.Warnf("flush %d interrupted after %d", f.inode, time.Since(s)) + err = syscall.EINTR + break + } + if time.Now().After(deadline) { + logger.Errorf("flush %d timeout after waited %s", f.inode, wait) + for _, c := range f.chunks { + for _, s := range c.slices { + logger.Errorf("pending slice %d-%d: %+v", f.inode, c.indx, *s) + } + } + buf := make([]byte, 1<<20) + n := runtime.Stack(buf, true) + logger.Warnf("All goroutines (%d):\n%s", runtime.NumGoroutine(), buf[:n]) + err = syscall.EIO + break + } + } + f.flushwaiting-- + if f.flushwaiting == 0 && f.writewaiting > 0 { + f.writecond.Broadcast() + } + if err == 0 { + err = f.err + } + return err +} + +func (f *fileWriter) Flush(ctx meta.Context) syscall.Errno { + return f.flush(ctx, false) +} + +func (f *fileWriter) Close(ctx meta.Context) syscall.Errno { + defer f.w.free(f) + return f.Flush(ctx) +} + +func (f *fileWriter) GetLength() uint64 { + f.Lock() + defer f.Unlock() + return f.length +} + +func (f *fileWriter) Truncate(length uint64) { + f.Lock() + defer f.Unlock() + // TODO: truncate write buffer if length < f.length + f.length = length +} + +type dataWriter struct { + sync.Mutex + m meta.Meta + store chunk.ChunkStore + reader DataReader + blockSize int + bufferSize int64 + files map[Ino]*fileWriter + maxRetries uint32 +} + +func NewDataWriter(conf *Config, m meta.Meta, store chunk.ChunkStore, reader DataReader) DataWriter { + w := &dataWriter{ + m: m, + store: store, + reader: reader, + blockSize: conf.Chunk.BlockSize, + bufferSize: int64(conf.Chunk.BufferSize), + files: make(map[Ino]*fileWriter), + maxRetries: uint32(conf.Meta.Retries), + } + go w.flushAll() + return w +} + +func (w *dataWriter) flushAll() { + for { + w.Lock() + now := time.Now() + for _, f := range w.files { + f.refs++ + w.Unlock() + tooMany := f.totalSlices() > 800 + f.Lock() + + lastBit := uint32(rand.Int() % 2) // choose half of chunks randomly + for i, c := range f.chunks { + hs := len(c.slices) / 2 + for j, s := range c.slices { + if !s.freezed && (now.Sub(s.started) > flushDuration || now.Sub(s.lastMod) > time.Second || + tooMany && i%2 == lastBit && j <= hs) { + s.freezed = true + go s.flushData() + } + } + } + f.Unlock() + w.free(f) + w.Lock() + } + w.Unlock() + time.Sleep(time.Millisecond * 100) + } +} + +func (w *dataWriter) Open(inode Ino, len uint64) FileWriter { + w.Lock() + defer w.Unlock() + f, ok := w.files[inode] + if !ok { + f = &fileWriter{ + w: w, + inode: inode, + length: len, + chunks: make(map[uint32]*chunkWriter), + } + f.flushcond = utils.NewCond(f) + f.writecond = utils.NewCond(f) + w.files[inode] = f + } + f.refs++ + return f +} + +func (w *dataWriter) find(inode Ino) *fileWriter { + w.Lock() + defer w.Unlock() + return w.files[inode] +} + +func (w *dataWriter) free(f *fileWriter) { + w.Lock() + defer w.Unlock() + f.refs-- + if f.refs == 0 { + delete(w.files, f.inode) + } +} + +func (w *dataWriter) Flush(ctx meta.Context, inode Ino) syscall.Errno { + f := w.find(inode) + if f != nil { + return f.Flush(ctx) + } + return 0 +} + +func (w *dataWriter) GetLength(inode Ino) uint64 { + f := w.find(inode) + if f != nil { + return f.GetLength() + } + return 0 +} + +func (w *dataWriter) Truncate(inode Ino, len uint64) { + f := w.find(inode) + if f != nil { + f.Truncate(len) + } +} diff --git a/pkg/winfsp/trace.go b/pkg/winfsp/trace.go new file mode 100644 index 0000000..07be59d --- /dev/null +++ b/pkg/winfsp/trace.go @@ -0,0 +1,117 @@ +/* + * trace.go + * + * Copyright 2017-2018 Bill Zissimopoulos + */ +/* + * This file is part of Cgofuse. + * + * It is licensed under the MIT license. The full license text can be found + * in the License.txt file at the root of this project. + */ + +package winfsp + +import ( + "fmt" + "log" + "os" + "path/filepath" + "runtime" + "runtime/debug" +) + +var ( + TracePattern = os.Getenv("CGOFUSE_TRACE") +) + +func traceJoin(deref bool, vals []interface{}) string { + rslt := "" + for _, v := range vals { + if deref { + switch i := v.(type) { + case *bool: + rslt += fmt.Sprintf(", %#v", *i) + case *int: + rslt += fmt.Sprintf(", %#v", *i) + case *int8: + rslt += fmt.Sprintf(", %#v", *i) + case *int16: + rslt += fmt.Sprintf(", %#v", *i) + case *int32: + rslt += fmt.Sprintf(", %#v", *i) + case *int64: + rslt += fmt.Sprintf(", %#v", *i) + case *uint: + rslt += fmt.Sprintf(", %#v", *i) + case *uint8: + rslt += fmt.Sprintf(", %#v", *i) + case *uint16: + rslt += fmt.Sprintf(", %#v", *i) + case *uint32: + rslt += fmt.Sprintf(", %#v", *i) + case *uint64: + rslt += fmt.Sprintf(", %#v", *i) + case *uintptr: + rslt += fmt.Sprintf(", %#v", *i) + case *float32: + rslt += fmt.Sprintf(", %#v", *i) + case *float64: + rslt += fmt.Sprintf(", %#v", *i) + case *complex64: + rslt += fmt.Sprintf(", %#v", *i) + case *complex128: + rslt += fmt.Sprintf(", %#v", *i) + case *string: + rslt += fmt.Sprintf(", %#v", *i) + default: + rslt += fmt.Sprintf(", %#v", v) + } + } else { + rslt += fmt.Sprintf(", %#v", v) + } + } + if len(rslt) > 0 { + rslt = rslt[2:] + } + return rslt +} + +func Trace(skip int, prfx string, vals ...interface{}) func(vals ...interface{}) { + if "" == TracePattern { + return func(vals ...interface{}) { + } + } + pc, _, _, ok := runtime.Caller(skip + 1) + name := "" + if ok { + fn := runtime.FuncForPC(pc) + name = fn.Name() + if m, _ := filepath.Match(TracePattern, name); !m { + return func(vals ...interface{}) { + } + } + } + if "" != prfx { + prfx = prfx + ": " + } + args := traceJoin(false, vals) + return func(vals ...interface{}) { + form := "%v%v(%v) = %v" + rslt := "" + rcvr := recover() + if nil != rcvr { + debug.PrintStack() + rslt = fmt.Sprintf("!PANIC:%v", rcvr) + } else { + if len(vals) != 1 { + form = "%v%v(%v) = (%v)" + } + rslt = traceJoin(true, vals) + } + log.Printf(form, prfx, name, args, rslt) + if nil != rcvr { + panic(rcvr) + } + } +} diff --git a/pkg/winfsp/winfs.go b/pkg/winfsp/winfs.go new file mode 100644 index 0000000..14de1f0 --- /dev/null +++ b/pkg/winfsp/winfs.go @@ -0,0 +1,597 @@ +//go:build windows +// +build windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package winfsp + +import ( + "fmt" + "os" + "path" + "runtime" + "strings" + "sync" + "syscall" + "time" + + "github.com/billziss-gh/cgofuse/fuse" + + "github.com/juicedata/juicefs/pkg/fs" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/vfs" +) + +var logger = utils.GetLogger("juicefs") + +type Ino = meta.Ino + +func trace(vals ...interface{}) func(vals ...interface{}) { + uid, gid, pid := fuse.Getcontext() + return Trace(1, fmt.Sprintf("[uid=%v,gid=%v,pid=%d]", uid, gid, pid), vals...) +} + +type juice struct { + fuse.FileSystemBase + sync.Mutex + conf *vfs.Config + vfs *vfs.VFS + fs *fs.FileSystem + host *fuse.FileSystemHost + handlers map[uint64]meta.Ino + badfd map[uint64]uint64 + + asRoot bool + delayClose int +} + +// Init is called when the file system is created. +func (j *juice) Init() { + j.handlers = make(map[uint64]meta.Ino) + j.badfd = make(map[uint64]uint64) +} + +func (j *juice) newContext() vfs.LogContext { + if j.asRoot { + return vfs.NewLogContext(meta.Background) + } + uid, gid, pid := fuse.Getcontext() + if uid == 0xffffffff { + uid = 0 + } + if gid == 0xffffffff { + gid = 0 + } + if pid == -1 { + pid = 0 + } + ctx := meta.NewContext(uint32(pid), uid, []uint32{gid}) + return vfs.NewLogContext(ctx) +} + +// Statfs gets file system statistics. +func (j *juice) Statfs(path string, stat *fuse.Statfs_t) int { + ctx := j.newContext() + // defer trace(path)(stat) + var totalspace, availspace, iused, iavail uint64 + j.fs.Meta().StatFS(ctx, &totalspace, &availspace, &iused, &iavail) + var bsize uint64 = 4096 + blocks := totalspace / bsize + bavail := availspace / bsize + stat.Namemax = 255 + stat.Frsize = 4096 + stat.Bsize = bsize + stat.Blocks = blocks + stat.Bfree = bavail + stat.Bavail = bavail + stat.Files = iused + iavail + stat.Ffree = iavail + stat.Favail = iavail + return 0 +} + +func errorconv(err syscall.Errno) int { + return -int(err) +} + +// Mknod creates a file node. +func (j *juice) Mknod(p string, mode uint32, dev uint64) (e int) { + ctx := j.newContext() + defer trace(p, mode, dev)(&e) + parent, err := j.fs.Open(ctx, path.Dir(p), 0) + if err != 0 { + e = errorconv(err) + return + } + _, errno := j.vfs.Mknod(ctx, parent.Inode(), path.Base(p), uint16(mode), 0, uint32(dev)) + e = -int(errno) + return +} + +// Mkdir creates a directory. +func (j *juice) Mkdir(path string, mode uint32) (e int) { + if path == "/.UMOUNTIT" { + logger.Infof("Umount %s ...", j.conf.Mountpoint) + go j.host.Unmount() + return -fuse.ENOENT + } + ctx := j.newContext() + defer trace(path, mode)(&e) + e = errorconv(j.fs.Mkdir(ctx, path, uint16(mode))) + return +} + +// Unlink removes a file. +func (j *juice) Unlink(path string) (e int) { + ctx := j.newContext() + defer trace(path)(&e) + e = errorconv(j.fs.Delete(ctx, path)) + return +} + +// Rmdir removes a directory. +func (j *juice) Rmdir(path string) (e int) { + ctx := j.newContext() + defer trace(path)(&e) + e = errorconv(j.fs.Delete(ctx, path)) + return +} + +func (j *juice) Symlink(target string, newpath string) (e int) { + ctx := j.newContext() + defer trace(target, newpath)(&e) + parent, err := j.fs.Open(ctx, path.Dir(newpath), 0) + if err != 0 { + e = errorconv(err) + return + } + _, errno := j.vfs.Symlink(ctx, target, parent.Inode(), path.Base(newpath)) + e = -int(errno) + return +} + +func (j *juice) Readlink(path string) (e int, target string) { + ctx := j.newContext() + defer trace(path)(&e, &target) + fi, err := j.fs.Stat(ctx, path) + if err != 0 { + e = errorconv(err) + return + } + t, errno := j.vfs.Readlink(ctx, fi.Inode()) + e = -int(errno) + target = string(t) + return +} + +// Rename renames a file. +func (j *juice) Rename(oldpath string, newpath string) (e int) { + ctx := j.newContext() + defer trace(oldpath, newpath)(&e) + e = errorconv(j.fs.Rename(ctx, oldpath, newpath, 0)) + return +} + +// Chmod changes the permission bits of a file. +func (j *juice) Chmod(path string, mode uint32) (e int) { + ctx := j.newContext() + defer trace(path, mode)(&e) + f, err := j.fs.Open(ctx, path, 0) + if err != 0 { + e = errorconv(err) + return + } + e = errorconv(f.Chmod(ctx, uint16(mode))) + return +} + +// Chown changes the owner and group of a file. +func (j *juice) Chown(path string, uid uint32, gid uint32) (e int) { + ctx := j.newContext() + defer trace(path, uid, gid)(&e) + f, err := j.fs.Open(ctx, path, 0) + if err != 0 { + e = errorconv(err) + return + } + if runtime.GOOS == "windows" { + // FIXME: don't change ownership in windows + return 0 + } + info, _ := f.Stat() + if uid == 0xffffffff { + uid = uint32(info.(*fs.FileStat).Uid()) + } + if gid == 0xffffffff { + gid = uint32(info.(*fs.FileStat).Gid()) + } + e = errorconv(f.Chown(ctx, uid, gid)) + return +} + +// Utimens changes the access and modification times of a file. +func (j *juice) Utimens(path string, tmsp []fuse.Timespec) (e int) { + ctx := j.newContext() + defer trace(path, tmsp)(&e) + f, err := j.fs.Open(ctx, path, 0) + if err != 0 { + e = errorconv(err) + } else { + e = errorconv(f.Utime(ctx, tmsp[0].Sec*1000+tmsp[0].Nsec/1e6, tmsp[1].Sec*1000+tmsp[1].Nsec/1e6)) + } + return +} + +// Create creates and opens a file. +// The flags are a combination of the fuse.O_* constants. +func (j *juice) Create(p string, flags int, mode uint32) (e int, fh uint64) { + ctx := j.newContext() + defer trace(p, flags, mode)(&e, &fh) + parent, err := j.fs.Open(ctx, path.Dir(p), 0) + if err != 0 { + e = errorconv(err) + return + } + entry, fh, errno := j.vfs.Create(ctx, parent.Inode(), path.Base(p), uint16(mode), 0, uint32(flags)) + if errno == 0 { + j.Lock() + j.handlers[fh] = entry.Inode + j.Unlock() + } + e = -int(errno) + return +} + +// Open opens a file. +// The flags are a combination of the fuse.O_* constants. +func (j *juice) Open(path string, flags int) (e int, fh uint64) { + var fi fuse.FileInfo_t + fi.Flags = flags + e = j.OpenEx(path, &fi) + fh = fi.Fh + return +} + +// Open opens a file. +// The flags are a combination of the fuse.O_* constants. +func (j *juice) OpenEx(path string, fi *fuse.FileInfo_t) (e int) { + ctx := j.newContext() + defer trace(path, fi.Flags)(&e) + f, err := j.fs.Open(ctx, path, 0) + if err != 0 { + e = -fuse.ENOENT + return + } + entry, fh, errno := j.vfs.Open(ctx, f.Inode(), uint32(fi.Flags)) + if errno == 0 { + fi.Fh = fh + if vfs.IsSpecialNode(f.Inode()) { + fi.DirectIo = true + } else { + fi.KeepCache = entry.Attr.KeepCache + } + j.Lock() + j.handlers[fh] = f.Inode() + j.Unlock() + } + e = -int(errno) + return +} + +func attrToStat(inode Ino, attr *meta.Attr, stat *fuse.Stat_t) { + stat.Ino = uint64(inode) + stat.Mode = attr.SMode() + stat.Uid = attr.Uid + if stat.Uid == 0 { + stat.Uid = 18 // System + } + stat.Gid = attr.Gid + if stat.Gid == 0 { + stat.Gid = 18 // System + } + stat.Birthtim.Sec = attr.Atime + stat.Birthtim.Nsec = int64(attr.Atimensec) + stat.Atim.Sec = attr.Atime + stat.Atim.Nsec = int64(attr.Atimensec) + stat.Mtim.Sec = attr.Mtime + stat.Mtim.Nsec = int64(attr.Mtimensec) + stat.Ctim.Sec = attr.Ctime + stat.Ctim.Nsec = int64(attr.Ctimensec) + stat.Nlink = attr.Nlink + var rdev uint32 + var size, blocks uint64 + switch attr.Typ { + case meta.TypeDirectory: + fallthrough + case meta.TypeSymlink: + fallthrough + case meta.TypeFile: + size = attr.Length + blocks = (size + 0xffff) / 0x10000 + stat.Blksize = 0x10000 + case meta.TypeBlockDev: + fallthrough + case meta.TypeCharDev: + rdev = attr.Rdev + } + stat.Size = int64(size) + stat.Blocks = int64(blocks) + stat.Rdev = uint64(rdev) +} + +func (j *juice) h2i(fh *uint64) meta.Ino { + defer j.Unlock() + j.Lock() + ino := j.handlers[*fh] + if ino == 0 { + newfh := j.badfd[*fh] + if newfh != 0 { + ino = j.handlers[newfh] + if ino > 0 { + *fh = newfh + } + } + } + return ino +} + +func (j *juice) reopen(p string, fh *uint64) meta.Ino { + e, newfh := j.Open(p, os.O_RDWR) + if e != 0 { + return 0 + } + j.Lock() + defer j.Unlock() + j.badfd[*fh] = newfh + *fh = newfh + return j.handlers[newfh] +} + +// Getattr gets file attributes. +func (j *juice) Getattr(p string, stat *fuse.Stat_t, fh uint64) (e int) { + ctx := j.newContext() + defer trace(p, fh)(stat, &e) + ino := j.h2i(&fh) + if ino == 0 { + fi, err := j.fs.Stat(ctx, p) + if err != 0 { + e = -fuse.ENOENT + return + } + ino = fi.Inode() + } + entry, errrno := j.vfs.GetAttr(ctx, ino, 0) + if errrno != 0 { + e = -int(errrno) + return + } + attrToStat(entry.Inode, entry.Attr, stat) + return +} + +// Truncate changes the size of a file. +func (j *juice) Truncate(path string, size int64, fh uint64) (e int) { + ctx := j.newContext() + defer trace(path, size, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + e = -fuse.EBADF + return + } + e = -int(j.vfs.Truncate(ctx, ino, size, 1, nil)) + return +} + +// Read reads data from a file. +func (j *juice) Read(path string, buf []byte, off int64, fh uint64) (e int) { + ctx := j.newContext() + defer trace(path, len(buf), off, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + logger.Warnf("read from released fd %d for %s, re-open it", fh, path) + ino = j.reopen(path, &fh) + } + if ino == 0 { + e = -fuse.EBADF + return + } + n, err := j.vfs.Read(ctx, ino, buf, uint64(off), fh) + if err != 0 { + e = -int(err) + return + } + return n +} + +// Write writes data to a file. +func (j *juice) Write(path string, buff []byte, off int64, fh uint64) (e int) { + ctx := j.newContext() + defer trace(path, len(buff), off, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + logger.Warnf("write to released fd %d for %s, re-open it", fh, path) + ino = j.reopen(path, &fh) + } + if ino == 0 { + e = -fuse.EBADF + return + } + errno := j.vfs.Write(ctx, ino, buff, uint64(off), fh) + if errno != 0 { + e = -int(errno) + } else { + e = len(buff) + } + return +} + +// Flush flushes cached file data. +func (j *juice) Flush(path string, fh uint64) (e int) { + ctx := j.newContext() + defer trace(path, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + e = -fuse.EBADF + return + } + e = -int(j.vfs.Flush(ctx, ino, fh, 0)) + return +} + +// Release closes an open file. +func (j *juice) Release(path string, fh uint64) int { + defer trace(path, fh)() + orig := fh + ino := j.h2i(&fh) + if ino == 0 { + logger.Warnf("release invalid fd %d for %s", fh, path) + return -fuse.EBADF + } + go func() { + time.Sleep(time.Second * time.Duration(j.delayClose)) + j.Lock() + delete(j.handlers, fh) + if orig != fh { + delete(j.badfd, orig) + } + j.Unlock() + j.vfs.Release(j.newContext(), ino, fh) + }() + return 0 +} + +// Fsync synchronizes file contents. +func (j *juice) Fsync(path string, datasync bool, fh uint64) (e int) { + ctx := j.newContext() + defer trace(path, datasync, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + e = -fuse.EBADF + } else { + e = -int(j.vfs.Fsync(ctx, ino, 1, fh)) + } + return +} + +// Opendir opens a directory. +func (j *juice) Opendir(path string) (e int, fh uint64) { + ctx := j.newContext() + defer trace(path)(&e, &fh) + f, err := j.fs.Open(ctx, path, 0) + if err != 0 { + e = -fuse.ENOENT + return + } + fh, errno := j.vfs.Opendir(ctx, f.Inode()) + if errno == 0 { + j.Lock() + j.handlers[fh] = f.Inode() + j.Unlock() + } + e = -int(errno) + return +} + +// Readdir reads a directory. +func (j *juice) Readdir(path string, + fill func(name string, stat *fuse.Stat_t, ofst int64) bool, + ofst int64, fh uint64) (e int) { + defer trace(path, ofst, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + e = -fuse.EBADF + return + } + ctx := j.newContext() + entries, err := j.vfs.Readdir(ctx, ino, 100000, int(ofst), fh, true) + if err != 0 { + e = -int(err) + return + } + var st fuse.Stat_t + var ok bool + var full = true + // all the entries should have same format + for _, e := range entries { + if !e.Attr.Full { + full = false + break + } + } + for _, e := range entries { + name := string(e.Name) + if full { + j.vfs.UpdateLength(e.Inode, e.Attr) + attrToStat(e.Inode, e.Attr, &st) + ok = fill(name, &st, 0) + } else { + ok = fill(name, nil, 0) + } + if !ok { + break + } + } + return +} + +// Releasedir closes an open directory. +func (j *juice) Releasedir(path string, fh uint64) (e int) { + defer trace(path, fh)(&e) + ino := j.h2i(&fh) + if ino == 0 { + e = -fuse.EBADF + return + } + j.Lock() + delete(j.handlers, fh) + j.Unlock() + e = -int(j.vfs.Releasedir(j.newContext(), ino, fh)) + return +} + +func Serve(v *vfs.VFS, fuseOpt string, fileCacheTo float64, asRoot bool, delayClose int) error { + var jfs juice + conf := v.Conf + jfs.conf = conf + jfs.vfs = v + var err error + jfs.fs, err = fs.NewFileSystem(conf, v.Meta, v.Store) + if err != nil { + logger.Fatalf("Initialize FileSystem failed: %s", err) + } + jfs.asRoot = asRoot + jfs.delayClose = delayClose + host := fuse.NewFileSystemHost(&jfs) + jfs.host = host + var options = "volname=" + conf.Format.Name + options += ",ExactFileSystemName=JuiceFS,create_umask=022,ThreadCount=16" + options += ",DirInfoTimeout=1000,VolumeInfoTimeout=1000,KeepFileCache" + options += fmt.Sprintf(",FileInfoTimeout=%d", int(fileCacheTo*1000)) + options += ",VolumePrefix=/juicefs/" + conf.Format.Name + if asRoot { + options += ",uid=-1,gid=-1" + } + if fuseOpt != "" { + options += "," + fuseOpt + } + host.SetCapCaseInsensitive(strings.HasSuffix(conf.Mountpoint, ":")) + host.SetCapReaddirPlus(true) + logger.Debugf("mount point: %s, options: %s", conf.Mountpoint, options) + _ = host.Mount(conf.Mountpoint, []string{"-o", options}) + return nil +} diff --git a/sdk/java/.gitignore b/sdk/java/.gitignore new file mode 100644 index 0000000..8b8c81d --- /dev/null +++ b/sdk/java/.gitignore @@ -0,0 +1,2 @@ +target/ +dependency-reduced-pom.xml diff --git a/sdk/java/Makefile b/sdk/java/Makefile new file mode 100644 index 0000000..605cd74 --- /dev/null +++ b/sdk/java/Makefile @@ -0,0 +1,24 @@ +GOROOT=$(shell go env GOROOT) + +all: package + +libjfs/target/libjfs.so.gz: ../../pkg/*/*.go libjfs/*.go + make -C libjfs + +compile: + mvn compile -B --quiet +test: libjfs + mvn test -B --quiet +package: libjfs/target/libjfs.so.gz + mvn package -B -Dmaven.test.skip=true +docker-package: + docker run --rm --mount "type=bind,src=$(GOROOT),target=/go" \ + --mount "type=bind,src=$(HOME)/go,target=/root/go" \ + --mount "type=bind,src=$(shell pwd)/../../,target=/workspace/juicefs" \ + --workdir /workspace/juicefs juicedata/sdk-builder \ + /bin/bash -c 'cd sdk/java && make' + +win: win-package package + +win-package: ../../pkg/*/*.go libjfs/*.go + make -C libjfs win diff --git a/sdk/java/conf/contract/juicefs.xml b/sdk/java/conf/contract/juicefs.xml new file mode 100644 index 0000000..e8c7caa --- /dev/null +++ b/sdk/java/conf/contract/juicefs.xml @@ -0,0 +1,86 @@ + + + fs.contract.test.fs.jfs + jfs:/// + + + fs.jfs.impl + io.juicefs.JuiceFileSystem + + + juicefs.no-usage-report + true + + + juicefs.names + a.local,b.local,c.local,d.local,e.local + + + juicefs.hosts + 127.0.0.2,127.0.0.3,127.0.0.4,127.0.0.5,127.0.0.6 + + + fs.contract.test.root-tests-enabled + true + + + fs.contract.is-case-sensitive + true + + + fs.contract.supports-append + true + + + fs.contract.supports-atomic-directory-delete + true + + + fs.contract.supports-block-locality + true + + + fs.contract.supports-atomic-rename + true + + + fs.contract.supports-settimes + true + + + fs.contract.supports-getfilestatus + true + + + fs.contract.supports-concat + true + + + fs.contract.supports-seek + true + + + fs.contract.rejects-seek-past-eof + true + + + fs.contract.supports-strict-exceptions + true + + + fs.contract.supports-unix-permissions + true + + + fs.contract.rename-returns-false-if-dest-exists + true + + + fs.contract.supports-file-reference + true + + + fs.contract.rename-returns-false-if-source-missing + true + + diff --git a/sdk/java/conf/core-site.xml b/sdk/java/conf/core-site.xml new file mode 100644 index 0000000..6c44852 --- /dev/null +++ b/sdk/java/conf/core-site.xml @@ -0,0 +1,36 @@ + + + + + fs.defaultFS + jfs://dev/ + + + fs.jfs.impl + io.juicefs.JuiceFileSystem + + + juicefs.no-usage-report + true + + + juicefs.file.checksum + true + + + juicefs.access-log + /tmp/juicefs-access.log + + + juicefs.dev.meta + 127.0.0.1 + + + juicefs.names + a.local,b.local,c.local,d.local,e.local + + + juicefs.hosts + 127.0.0.2,127.0.0.3,127.0.0.4,127.0.0.5,127.0.0.6 + + diff --git a/sdk/java/conf/log4j.properties b/sdk/java/conf/log4j.properties new file mode 100644 index 0000000..ac18fda --- /dev/null +++ b/sdk/java/conf/log4j.properties @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n \ No newline at end of file diff --git a/sdk/java/libjfs/Makefile b/sdk/java/libjfs/Makefile new file mode 100644 index 0000000..c0d47af --- /dev/null +++ b/sdk/java/libjfs/Makefile @@ -0,0 +1,26 @@ +export GO111MODULE=on +LDFLAGS = -s -w + +REVISION := $(shell git rev-parse --short HEAD 2>/dev/null) +REVISIONDATE := $(shell git log -1 --pretty=format:'%ad' --date short 2>/dev/null) +PKG := github.com/juicedata/juicefs/pkg/version +LDFLAGS = -s -w +LDFLAGS += -X $(PKG).revision=$(REVISION) \ + -X $(PKG).revisionDate=$(REVISIONDATE) + +all: target/libjfs.so.gz + +win: libjfs.dll + mkdir -p target + gzip -c libjfs.dll > target/libjfs.so.gz + +target/libjfs.so.gz: libjfs.so + mkdir -p target + gzip -c libjfs.so > target/libjfs.so.gz + +libjfs.so: *.go ../../../pkg/*/*.go + go build -buildmode=c-shared -ldflags="$(LDFLAGS)" -o libjfs.so . + +libjfs.dll: *.go ../../../pkg/*/*.go + GOOS=windows CGO_ENABLED=1 CC=x86_64-w64-mingw32-gcc go build -o libjfs.dll \ + -buildmode=c-shared -ldflags="$(LDFLAGS)" diff --git a/sdk/java/libjfs/guid.go b/sdk/java/libjfs/guid.go new file mode 100644 index 0000000..a827ff3 --- /dev/null +++ b/sdk/java/libjfs/guid.go @@ -0,0 +1,145 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "crypto/md5" + "encoding/binary" + "os/user" + "strconv" + "sync" +) + +type pwent struct { + id int + name string +} + +type mapping struct { + sync.Mutex + salt string + usernames map[string]int + userIDs map[int]string + groups map[string]int + groupIDs map[int]string +} + +func newMapping(salt string) *mapping { + m := &mapping{ + salt: salt, + usernames: make(map[string]int), + userIDs: make(map[int]string), + groups: make(map[string]int), + groupIDs: make(map[int]string), + } + m.update(genAllUids(), genAllGids()) + return m +} + +func (m *mapping) genGuid(name string) int { + digest := md5.Sum([]byte(m.salt + name + m.salt)) + a := binary.LittleEndian.Uint64(digest[0:8]) + b := binary.LittleEndian.Uint64(digest[8:16]) + return int(uint32(a ^ b)) +} + +func (m *mapping) lookupUser(name string) int { + m.Lock() + defer m.Unlock() + var id int + if id, ok := m.usernames[name]; ok { + return id + } + u, _ := user.Lookup(name) + if u != nil { + id, _ = strconv.Atoi(u.Uid) + } else { + id = m.genGuid(name) + } + m.usernames[name] = id + m.userIDs[id] = name + return id +} + +func (m *mapping) lookupGroup(name string) int { + m.Lock() + defer m.Unlock() + var id int + if id, ok := m.groups[name]; ok { + return id + } + g, _ := user.LookupGroup(name) + if g == nil { + id = m.genGuid(name) + } else { + id, _ = strconv.Atoi(g.Gid) + } + m.groups[name] = id + m.groupIDs[id] = name + return 0 +} + +func (m *mapping) lookupUserID(id int) string { + m.Lock() + defer m.Unlock() + if name, ok := m.userIDs[id]; ok { + return name + } + u, _ := user.LookupId(strconv.Itoa(id)) + if u == nil { + u = &user.User{Username: strconv.Itoa(id)} + } + name := u.Username + if len(name) > 49 { + name = name[:49] + } + m.usernames[name] = id + m.userIDs[id] = name + return name +} + +func (m *mapping) lookupGroupID(id int) string { + m.Lock() + defer m.Unlock() + if name, ok := m.groupIDs[id]; ok { + return name + } + g, _ := user.LookupGroupId(strconv.Itoa(id)) + if g == nil { + g = &user.Group{Name: strconv.Itoa(id)} + } + name := g.Name + if len(name) > 49 { + name = name[:49] + } + m.groups[name] = id + m.groupIDs[id] = name + return name +} + +func (m *mapping) update(uids []pwent, gids []pwent) { + m.Lock() + defer m.Unlock() + for _, u := range uids { + m.usernames[u.name] = u.id + m.userIDs[u.id] = u.name + } + for _, g := range gids { + m.groups[g.name] = g.id + m.groupIDs[g.id] = g.name + } +} diff --git a/sdk/java/libjfs/guid_unix.go b/sdk/java/libjfs/guid_unix.go new file mode 100644 index 0000000..e42f48f --- /dev/null +++ b/sdk/java/libjfs/guid_unix.go @@ -0,0 +1,68 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +// #include +// #include +import "C" +import ( + "sync" +) + +// protect getpwent and getgrent +var cgoMutex sync.Mutex + +func genAllUids() []pwent { + cgoMutex.Lock() + defer cgoMutex.Unlock() + C.setpwent() + defer C.endpwent() + var uids []pwent + for { + p := C.getpwent() + if p == nil { + break + } + name := C.GoString(p.pw_name) + if name != "root" { + uids = append(uids, pwent{int(p.pw_uid), name}) + } + } + return uids +} + +func genAllGids() []pwent { + cgoMutex.Lock() + defer cgoMutex.Unlock() + C.setgrent() + defer C.endgrent() + var gids []pwent + for { + p := C.getgrent() + if p == nil { + break + } + name := C.GoString(p.gr_name) + if name != "root" { + gids = append(gids, pwent{int(p.gr_gid), name}) + } + } + return gids +} diff --git a/sdk/java/libjfs/guid_windows.go b/sdk/java/libjfs/guid_windows.go new file mode 100644 index 0000000..52cc411 --- /dev/null +++ b/sdk/java/libjfs/guid_windows.go @@ -0,0 +1,139 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "os/exec" + "strconv" + "strings" +) + +func genAllUids() []pwent { + out, err := exec.Command("wmic", "useraccount", "list", "brief").Output() + if err != nil { + logger.Errorf("cmd : %s", err) + return nil + } + lines := strings.Split(string(out), "\r\n") + if len(lines) < 2 { + logger.Errorf("no uids: %s", string(out)) + return nil + } + var uids []pwent + for _, line := range lines[1 : len(lines)-1] { + fields := strings.Fields(line) + if len(fields) < 5 { + continue + } + name := fields[len(fields)-2] + sid := fields[len(fields)-1] + ps := strings.Split(sid, "-") + auth, _ := strconv.Atoi(ps[2]) + count := len(ps) - 3 + var subAuth int + if count > 0 { + subAuth, _ = strconv.Atoi(ps[3]) + } + rid, _ := strconv.Atoi(ps[len(ps)-1]) + var uid int + if auth == 5 { + if count == 1 { + // "SYSTEM" S-1-5-18 <=> uid/gid: 18 + uid = rid + } else if count == 2 && subAuth == 32 { + // "Users" S-1-5-32-545 <=> uid/gid: 545 + uid = rid + } else if count >= 2 && subAuth == 5 { + // not supported + } else if count >= 5 && subAuth == 21 { + // S-1-5-21-X-Y-Z-RID <=> uid/gid: 0x30000 + RID + // S-1-5-21-X-Y-Z-RID <=> uid/gid: 0x100000 + RID + uid = 0x30000 + rid + } else if count == 2 { + // S-1-5-X-RID <=> uid/gid: 0x1000 * X + RID + uid = 0x1000*subAuth + rid + } + } else if auth == 16 { + // S-1-16-RID <=> uid/gid: 0x60000 + RID + uid = 0x60000*subAuth + rid + } + if uid > 0 { + uids = append(uids, pwent{uid, name}) + logger.Tracef("found account %s -> %d (%s)", name, uid, sid) + } + } + return uids +} + +func genAllGids() []pwent { + out, err := exec.Command("wmic", "group", "list", "brief").Output() + if err != nil { + logger.Errorf("cmd : %s", err) + return nil + } + lines := strings.Split(string(out), "\r\n") + if len(lines) < 2 { + logger.Errorf("no gids: %s", string(out)) + return nil + } + title := lines[0] + nameIndex := strings.Index(title, "Name") + sidIndex := strings.Index(title, "SID") + var gids []pwent + for _, line := range lines[1 : len(lines)-1] { + if len(line) < sidIndex { + continue + } + name := strings.TrimSpace(line[nameIndex : sidIndex-1]) + sid := strings.TrimSpace(line[sidIndex:]) + ps := strings.Split(sid, "-") + auth, _ := strconv.Atoi(ps[2]) + count := len(ps) - 3 + var subAuth int + if count > 0 { + subAuth, _ = strconv.Atoi(ps[3]) + } + rid, _ := strconv.Atoi(ps[len(ps)-1]) + var gid int + if auth == 5 { + if count == 1 { + // "SYSTEM" S-1-5-18 <=> uid/gid: 18 + gid = rid + } else if count == 2 && subAuth == 32 { + // "Users" S-1-5-32-545 <=> uid/gid: 545 + gid = rid + } else if count >= 2 && subAuth == 5 { + // not supported + } else if count >= 5 && subAuth == 21 { + // S-1-5-21-X-Y-Z-RID <=> uid/gid: 0x30000 + RID + // S-1-5-21-X-Y-Z-RID <=> uid/gid: 0x100000 + RID + gid = 0x30000 + rid + } else if count == 2 { + // S-1-5-X-RID <=> uid/gid: 0x1000 * X + RID + gid = 0x1000*subAuth + rid + } + } else if auth == 16 { + // S-1-16-RID <=> uid/gid: 0x60000 + RID + gid = 0x60000*subAuth + rid + } + if gid > 0 { + gids = append(gids, pwent{gid, name}) + logger.Tracef("found group %s -> %d (%s)", name, gid, sid) + } + } + return gids +} diff --git a/sdk/java/libjfs/main.go b/sdk/java/libjfs/main.go new file mode 100644 index 0000000..ecb28c4 --- /dev/null +++ b/sdk/java/libjfs/main.go @@ -0,0 +1,1119 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +// #cgo linux LDFLAGS: -ldl +// #cgo linux CFLAGS: -Wno-discarded-qualifiers -D_GNU_SOURCE +// #include +// #include +// #include +// #include +// #include +// #include +import "C" +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + _ "net/http/pprof" + "os" + "path/filepath" + "runtime/debug" + "strconv" + "strings" + "sync" + "syscall" + "time" + "unsafe" + + "github.com/juicedata/juicefs/pkg/chunk" + "github.com/juicedata/juicefs/pkg/fs" + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/metric" + "github.com/juicedata/juicefs/pkg/object" + "github.com/juicedata/juicefs/pkg/usage" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/juicedata/juicefs/pkg/version" + "github.com/juicedata/juicefs/pkg/vfs" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/push" + + "github.com/sirupsen/logrus" +) + +var ( + filesLock sync.Mutex + openFiles = make(map[int]*fwrapper) + minFreeHandle = 1 + + fslock sync.Mutex + handlers = make(map[uintptr]*wrapper) + activefs = make(map[string][]*wrapper) + logger = utils.GetLogger("juicefs") + pusher *push.Pusher +) + +const ( + EPERM = -0x01 + ENOENT = -0x02 + EINTR = -0x04 + EIO = -0x05 + EACCES = -0x0d + EEXIST = -0x11 + ENOTDIR = -0x14 + EINVAL = -0x16 + ENOSPC = -0x1c + EROFS = -0x1e + ENOTEMPTY = -0x27 + ENODATA = -0x3d + ENOTSUP = -0x5f +) + +func errno(err error) int { + if err == nil { + return 0 + } + eno, ok := err.(syscall.Errno) + if !ok { + return EIO + } + if eno == 0 { + return 0 + } + // Use the errno in Linux for all the OS + switch eno { + case syscall.EPERM: + return EPERM + case syscall.ENOENT: + return ENOENT + case syscall.EINTR: + return EINTR + case syscall.EIO: + return EIO + case syscall.EACCES: + return EACCES + case syscall.EEXIST: + return EEXIST + case syscall.ENOTDIR: + return ENOTDIR + case syscall.EINVAL: + return EINVAL + case syscall.ENOSPC: + return ENOSPC + case syscall.EROFS: + return EROFS + case syscall.ENOTEMPTY: + return ENOTEMPTY + case syscall.ENODATA: + return ENODATA + case syscall.ENOTSUP: + return ENOTSUP + default: + logger.Warnf("unknown errno %d: %s", eno, err) + return -int(eno) + } +} + +type wrapper struct { + *fs.FileSystem + ctx meta.Context + m *mapping + user string + superuser string + supergroup string +} + +func (w *wrapper) withPid(pid int) meta.Context { + // mapping Java Thread ID to global one + ctx := meta.NewContext(w.ctx.Pid()*1000+uint32(pid), w.ctx.Uid(), w.ctx.Gids()) + ctx.WithValue(meta.CtxKey("behavior"), "Hadoop") + return ctx +} + +func (w *wrapper) isSuperuser(name string, groups []string) bool { + if name == w.superuser { + return true + } + for _, g := range groups { + if g == w.supergroup { + return true + } + } + return false +} + +func (w *wrapper) lookupUid(name string) uint32 { + if name == w.superuser { + return 0 + } + return uint32(w.m.lookupUser(name)) +} + +func (w *wrapper) lookupGid(group string) uint32 { + if group == w.supergroup { + return 0 + } + return uint32(w.m.lookupGroup(group)) +} + +func (w *wrapper) lookupGids(groups string) []uint32 { + var gids []uint32 + for _, g := range strings.Split(groups, ",") { + gids = append(gids, w.lookupGid(g)) + } + return gids +} + +func (w *wrapper) uid2name(uid uint32) string { + name := w.superuser + if uid > 0 { + name = w.m.lookupUserID(int(uid)) + } + return name +} + +func (w *wrapper) gid2name(gid uint32) string { + group := w.supergroup + if gid > 0 { + group = w.m.lookupGroupID(int(gid)) + } + return group +} + +type fwrapper struct { + *fs.File + w *wrapper +} + +func nextFileHandle(f *fs.File, w *wrapper) int { + filesLock.Lock() + defer filesLock.Unlock() + for i := minFreeHandle; ; i++ { + if _, ok := openFiles[i]; !ok { + openFiles[i] = &fwrapper{f, w} + minFreeHandle = i + 1 + return i + } + } +} + +func freeHandle(fd int) { + filesLock.Lock() + defer filesLock.Unlock() + f := openFiles[fd] + if f != nil { + delete(openFiles, fd) + if fd < minFreeHandle { + minFreeHandle = fd + } + } +} + +type javaConf struct { + MetaURL string `json:"meta"` + Bucket string `json:"bucket"` + ReadOnly bool `json:"readOnly"` + OpenCache float64 `json:"openCache"` + BackupMeta int64 `json:"backupMeta"` + CacheDir string `json:"cacheDir"` + CacheSize int64 `json:"cacheSize"` + FreeSpace string `json:"freeSpace"` + AutoCreate bool `json:"autoCreate"` + CacheFullBlock bool `json:"cacheFullBlock"` + Writeback bool `json:"writeback"` + MemorySize int `json:"memorySize"` + Prefetch int `json:"prefetch"` + Readahead int `json:"readahead"` + UploadLimit int `json:"uploadLimit"` + DownloadLimit int `json:"downloadLimit"` + MaxUploads int `json:"maxUploads"` + MaxDeletes int `json:"maxDeletes"` + GetTimeout int `json:"getTimeout"` + PutTimeout int `json:"putTimeout"` + FastResolve bool `json:"fastResolve"` + AttrTimeout float64 `json:"attrTimeout"` + EntryTimeout float64 `json:"entryTimeout"` + DirEntryTimeout float64 `json:"dirEntryTimeout"` + Debug bool `json:"debug"` + NoUsageReport bool `json:"noUsageReport"` + AccessLog string `json:"accessLog"` + PushGateway string `json:"pushGateway"` + PushInterval int `json:"pushInterval"` + PushAuth string `json:"pushAuth"` +} + +func getOrCreate(name, user, group, superuser, supergroup string, f func() *fs.FileSystem) uintptr { + fslock.Lock() + defer fslock.Unlock() + ws := activefs[name] + var jfs *fs.FileSystem + var m *mapping + if len(ws) > 0 { + jfs = ws[0].FileSystem + m = ws[0].m + } else { + m = newMapping(name) + jfs = f() + if jfs == nil { + return 0 + } + logger.Infof("JuiceFileSystem created for user:%s group:%s", user, group) + } + w := &wrapper{jfs, nil, m, user, superuser, supergroup} + if w.isSuperuser(user, strings.Split(group, ",")) { + w.ctx = meta.NewContext(uint32(os.Getpid()), 0, []uint32{0}) + } else { + w.ctx = meta.NewContext(uint32(os.Getpid()), w.lookupUid(user), w.lookupGids(group)) + } + activefs[name] = append(ws, w) + h := uintptr(unsafe.Pointer(w)) & 0x7fffffff // low 32bits + handlers[h] = w + return h +} + +func createStorage(format *meta.Format) (object.ObjectStorage, error) { + var blob object.ObjectStorage + var err error + if format.Shards > 1 { + blob, err = object.NewSharded(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey, format.Shards) + } else { + blob, err = object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey) + } + if err != nil { + return nil, err + } + return object.WithPrefix(blob, format.Name+"/"), nil +} + +//export jfs_init +func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) uintptr { + name := C.GoString(cname) + debug.SetGCPercent(50) + object.UserAgent = "JuiceFS-SDK " + version.Version() + return getOrCreate(name, C.GoString(user), C.GoString(group), C.GoString(superuser), C.GoString(supergroup), func() *fs.FileSystem { + var jConf javaConf + err := json.Unmarshal([]byte(C.GoString(jsonConf)), &jConf) + if err != nil { + logger.Fatalf("invalid json: %s", C.GoString(jsonConf)) + } + if jConf.Debug || os.Getenv("JUICEFS_DEBUG") != "" { + utils.SetLogLevel(logrus.DebugLevel) + go func() { + for port := 6060; port < 6100; port++ { + logger.Debugf("listen at 127.0.0.1:%d", port) + _ = http.ListenAndServe(fmt.Sprintf("127.0.0.1:%d", port), nil) + } + }() + } else if os.Getenv("JUICEFS_LOGLEVEL") != "" { + level, err := logrus.ParseLevel(os.Getenv("JUICEFS_LOGLEVEL")) + if err == nil { + utils.SetLogLevel(level) + } else { + utils.SetLogLevel(logrus.WarnLevel) + logger.Errorf("JUICEFS_LOGLEVEL: %s", err) + } + } else { + utils.SetLogLevel(logrus.WarnLevel) + } + + addr := jConf.MetaURL + m := meta.NewClient(addr, &meta.Config{ + Retries: 10, + Strict: true, + ReadOnly: jConf.ReadOnly, + OpenCache: time.Duration(jConf.OpenCache * 1e9), + MaxDeletes: jConf.MaxDeletes, + }) + format, err := m.Load() + if err != nil { + logger.Fatalf("load setting: %s", err) + } + + if jConf.PushGateway != "" && pusher == nil { + registry := prometheus.NewRegistry() // replace default so only JuiceFS metrics are exposed + prometheus.DefaultGatherer = registry + prometheus.DefaultRegisterer = prometheus.WrapRegistererWithPrefix("juicefs_", registry) + prometheus.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) + prometheus.MustRegister(prometheus.NewGoCollector()) + // TODO: support multiple volumes + pusher = push.New(jConf.PushGateway, "juicefs").Gatherer(prometheus.DefaultGatherer) + pusher = pusher.Grouping("vol_name", format.Name).Grouping("mp", "sdk-"+strconv.Itoa(os.Getpid())) + if h, err := os.Hostname(); err == nil { + pusher = pusher.Grouping("instance", h) + } else { + logger.Warnf("cannot get hostname: %s", err) + } + if strings.Contains(jConf.PushAuth, ":") { + parts := strings.Split(jConf.PushAuth, ":") + pusher = pusher.BasicAuth(parts[0], parts[1]) + } + interval := time.Second * 10 + if jConf.PushInterval > 0 { + interval = time.Second * time.Duration(jConf.PushInterval) + } + go func() { + for { + time.Sleep(interval) + if err := pusher.Push(); err != nil { + logger.Warnf("push metrics to %s: %s", jConf.PushGateway, err) + } + } + }() + meta.InitMetrics() + vfs.InitMetrics() + go metric.UpdateMetrics(m) + } + + if jConf.Bucket != "" { + format.Bucket = jConf.Bucket + } + blob, err := createStorage(format) + if err != nil { + logger.Fatalf("object storage: %s", err) + } + logger.Infof("Data use %s", blob) + + var freeSpaceRatio = 0.1 + if jConf.FreeSpace != "" { + freeSpaceRatio, _ = strconv.ParseFloat(jConf.FreeSpace, 64) + } + chunkConf := chunk.Config{ + BlockSize: format.BlockSize * 1024, + Compress: format.Compression, + CacheDir: jConf.CacheDir, + CacheMode: 0644, // all user can read cache + CacheSize: jConf.CacheSize, + FreeSpace: float32(freeSpaceRatio), + AutoCreate: jConf.AutoCreate, + CacheFullBlock: jConf.CacheFullBlock, + MaxUpload: jConf.MaxUploads, + UploadLimit: int64(jConf.UploadLimit) * 1e6 / 8, + DownloadLimit: int64(jConf.DownloadLimit) * 1e6 / 8, + Prefetch: jConf.Prefetch, + Writeback: jConf.Writeback, + Partitions: format.Partitions, + GetTimeout: time.Second * time.Duration(jConf.GetTimeout), + PutTimeout: time.Second * time.Duration(jConf.PutTimeout), + BufferSize: jConf.MemorySize << 20, + Readahead: jConf.Readahead << 20, + } + if chunkConf.CacheDir != "memory" { + ds := utils.SplitDir(chunkConf.CacheDir) + for i := range ds { + ds[i] = filepath.Join(ds[i], format.UUID) + } + chunkConf.CacheDir = strings.Join(ds, string(os.PathListSeparator)) + } + store := chunk.NewCachedStore(blob, chunkConf) + m.OnMsg(meta.DeleteChunk, func(args ...interface{}) error { + chunkid := args[0].(uint64) + length := args[1].(uint32) + return store.Remove(chunkid, int(length)) + }) + m.OnMsg(meta.CompactChunk, func(args ...interface{}) error { + slices := args[0].([]meta.Slice) + chunkid := args[1].(uint64) + return vfs.Compact(chunkConf, store, slices, chunkid) + }) + err = m.NewSession() + if err != nil { + logger.Fatalf("new session: %s", err) + } + + conf := &vfs.Config{ + Meta: &meta.Config{ + Retries: 10, + }, + Format: format, + Chunk: &chunkConf, + AttrTimeout: time.Millisecond * time.Duration(jConf.AttrTimeout*1000), + EntryTimeout: time.Millisecond * time.Duration(jConf.EntryTimeout*1000), + DirEntryTimeout: time.Millisecond * time.Duration(jConf.DirEntryTimeout*1000), + AccessLog: jConf.AccessLog, + FastResolve: jConf.FastResolve, + } + if d := jConf.BackupMeta; d > 0 { + go vfs.Backup(m, blob, time.Duration(d*1e9)) + } + if !jConf.NoUsageReport { + go usage.ReportUsage(m, "java-sdk "+version.Version()) + } + jfs, err := fs.NewFileSystem(conf, m, store) + if err != nil { + logger.Errorf("Initialize failed: %s", err) + return nil + } + return jfs + }) +} + +func F(p uintptr) *wrapper { + fslock.Lock() + defer fslock.Unlock() + return handlers[p] +} + +//export jfs_update_uid_grouping +func jfs_update_uid_grouping(h uintptr, uidstr *C.char, grouping *C.char) { + w := F(h) + if w == nil { + return + } + var uids []pwent + if uidstr != nil { + for _, line := range strings.Split(C.GoString(uidstr), "\n") { + fields := strings.Split(line, ":") + if len(fields) < 2 { + continue + } + username := strings.TrimSpace(fields[0]) + uid, _ := strconv.Atoi(strings.TrimSpace(fields[1])) + uids = append(uids, pwent{uid, username}) + } + + var buffer bytes.Buffer + for _, u := range uids { + buffer.WriteString(fmt.Sprintf("\t%v:%v\n", u.name, u.id)) + } + logger.Debugf("Update uids mapping\n %s", buffer.String()) + } + + var gids []pwent + var groups []string + if grouping != nil { + for _, line := range strings.Split(C.GoString(grouping), "\n") { + fields := strings.Split(line, ":") + if len(fields) < 2 { + continue + } + gname := strings.TrimSpace(fields[0]) + gid, _ := strconv.Atoi(strings.TrimSpace(fields[1])) + gids = append(gids, pwent{gid, gname}) + if len(fields) > 2 { + for _, user := range strings.Split(fields[len(fields)-1], ",") { + if strings.TrimSpace(user) == w.user { + groups = append(groups, gname) + } + } + } + } + logger.Debugf("Update groups of %s to %s", w.user, strings.Join(groups, ",")) + } + w.m.update(uids, gids) + + if w.isSuperuser(w.user, groups) { + w.ctx = meta.NewContext(uint32(os.Getpid()), 0, []uint32{0}) + } else if len(groups) > 0 { + w.ctx = meta.NewContext(uint32(os.Getpid()), w.lookupUid(w.user), w.lookupGids(strings.Join(groups, ","))) + } +} + +//export jfs_term +func jfs_term(pid int, h uintptr) int { + w := F(h) + if w == nil { + return 0 + } + ctx := w.withPid(pid) + // sync all open files + filesLock.Lock() + var m sync.WaitGroup + var toClose []int + for fd, f := range openFiles { + if f.w == w { + m.Add(1) + go func(f *fs.File) { + defer m.Done() + _ = f.Close(ctx) + }(f.File) + toClose = append(toClose, fd) + } + } + for _, fd := range toClose { + delete(openFiles, fd) + } + filesLock.Unlock() + m.Wait() + + fslock.Lock() + defer fslock.Unlock() + delete(handlers, h) + for name, ws := range activefs { + for i := range ws { + if ws[i] == w { + if len(ws) > 1 { + ws[i] = ws[len(ws)-1] + activefs[name] = ws[:len(ws)-1] + } else { + _ = w.Flush() + // don't close the filesystem, so it can be re-used later + // w.Close() + // delete(activefs, name) + } + } + } + } + if pusher != nil { + if err := pusher.Push(); err != nil { + logger.Warnf("push metrics: %s", err) + } + } + return 0 +} + +//export jfs_open +func jfs_open(pid int, h uintptr, cpath *C.char, flags int) int { + w := F(h) + if w == nil { + return EINVAL + } + path := C.GoString(cpath) + f, err := w.Open(w.withPid(pid), path, uint32(flags)) + if err != 0 { + return errno(err) + } + st, _ := f.Stat() + if st.IsDir() { + return ENOENT + } + return nextFileHandle(f, w) +} + +//export jfs_access +func jfs_access(pid int, h uintptr, cpath *C.char, flags int) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Access(w.withPid(pid), C.GoString(cpath), flags)) +} + +//export jfs_create +func jfs_create(pid int, h uintptr, cpath *C.char, mode uint16) int { + w := F(h) + if w == nil { + return EINVAL + } + path := C.GoString(cpath) + f, err := w.Create(w.withPid(pid), path, mode) + if err != 0 { + return errno(err) + } + if w.ctx.Uid() == 0 && w.user != w.superuser { + // belongs to supergroup + _ = setOwner(w, w.withPid(pid), C.GoString(cpath), w.user, "") + } + return nextFileHandle(f, w) +} + +//export jfs_mkdir +func jfs_mkdir(pid int, h uintptr, cpath *C.char, mode C.mode_t) int { + w := F(h) + if w == nil { + return EINVAL + } + err := errno(w.Mkdir(w.withPid(pid), C.GoString(cpath), uint16(mode))) + if err == 0 && w.ctx.Uid() == 0 && w.user != w.superuser { + // belongs to supergroup + _ = setOwner(w, w.withPid(pid), C.GoString(cpath), w.user, "") + } + return err +} + +//export jfs_delete +func jfs_delete(pid int, h uintptr, cpath *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Delete(w.withPid(pid), C.GoString(cpath))) +} + +//export jfs_rmr +func jfs_rmr(pid int, h uintptr, cpath *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Rmr(w.withPid(pid), C.GoString(cpath))) +} + +//export jfs_rename +func jfs_rename(pid int, h uintptr, oldpath *C.char, newpath *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Rename(w.withPid(pid), C.GoString(oldpath), C.GoString(newpath), meta.RenameNoReplace)) +} + +//export jfs_truncate +func jfs_truncate(pid int, h uintptr, path *C.char, length uint64) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Truncate(w.withPid(pid), C.GoString(path), length)) +} + +//export jfs_setXattr +func jfs_setXattr(pid int, h uintptr, path *C.char, name *C.char, value uintptr, vlen int, mode int) int { + w := F(h) + if w == nil { + return EINVAL + } + var flags uint32 + switch mode { + case 1: + flags = meta.XattrCreate + case 2: + flags = meta.XattrReplace + } + return errno(w.SetXattr(w.withPid(pid), C.GoString(path), C.GoString(name), toBuf(value, vlen), flags)) +} + +//export jfs_getXattr +func jfs_getXattr(pid int, h uintptr, path *C.char, name *C.char, buf uintptr, bufsize int) int { + w := F(h) + if w == nil { + return EINVAL + } + buff, err := w.GetXattr(w.withPid(pid), C.GoString(path), C.GoString(name)) + if err != 0 { + return errno(err) + } + if len(buff) >= bufsize { + return bufsize + } + copy(toBuf(buf, bufsize), buff) + return len(buff) +} + +//export jfs_listXattr +func jfs_listXattr(pid int, h uintptr, path *C.char, buf uintptr, bufsize int) int { + w := F(h) + if w == nil { + return EINVAL + } + buff, err := w.ListXattr(w.withPid(pid), C.GoString(path)) + if err != 0 { + return errno(err) + } + if len(buff) >= bufsize { + return bufsize + } + copy(toBuf(buf, bufsize), buff) + return len(buff) +} + +//export jfs_removeXattr +func jfs_removeXattr(pid int, h uintptr, path *C.char, name *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.RemoveXattr(w.withPid(pid), C.GoString(path), C.GoString(name))) +} + +//export jfs_symlink +func jfs_symlink(pid int, h uintptr, target *C.char, link *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return errno(w.Symlink(w.withPid(pid), C.GoString(target), C.GoString(link))) +} + +//export jfs_readlink +func jfs_readlink(pid int, h uintptr, link *C.char, buf uintptr, bufsize int) int { + w := F(h) + if w == nil { + return EINVAL + } + target, err := w.Readlink(w.withPid(pid), C.GoString(link)) + if err != 0 { + return errno(err) + } + if len(target)+1 >= bufsize { + target = target[:bufsize-1] + } + wb := utils.NewNativeBuffer(toBuf(buf, bufsize)) + wb.Put(target) + wb.Put8(0) + return len(target) +} + +// mode:4 length:8 mtime:8 atime:8 user:50 group:50 +func fill_stat(w *wrapper, wb *utils.Buffer, st *fs.FileStat) int { + wb.Put32(uint32(st.Mode())) + wb.Put64(uint64(st.Size())) + wb.Put64(uint64(st.Mtime())) + wb.Put64(uint64(st.Atime())) + user := w.uid2name(uint32(st.Uid())) + wb.Put([]byte(user)) + wb.Put8(0) + group := w.gid2name(uint32(st.Gid())) + wb.Put([]byte(group)) + wb.Put8(0) + return 30 + len(user) + len(group) +} + +//export jfs_stat1 +func jfs_stat1(pid int, h uintptr, cpath *C.char, buf uintptr) int { + w := F(h) + if w == nil { + return EINVAL + } + info, err := w.Stat(w.withPid(pid), C.GoString(cpath)) + if err != 0 { + return errno(err) + } + return fill_stat(w, utils.NewNativeBuffer(toBuf(buf, 130)), info) +} + +//export jfs_lstat1 +func jfs_lstat1(pid int, h uintptr, cpath *C.char, buf uintptr) int { + w := F(h) + if w == nil { + return EINVAL + } + fi, err := w.Stat(w.withPid(pid), C.GoString(cpath)) + if err != 0 { + return errno(err) + } + return fill_stat(w, utils.NewNativeBuffer(toBuf(buf, 130)), fi) +} + +//export jfs_summary +func jfs_summary(pid int, h uintptr, cpath *C.char, buf uintptr) int { + w := F(h) + if w == nil { + return EINVAL + } + ctx := w.withPid(pid) + f, err := w.Open(ctx, C.GoString(cpath), 0) + if err != 0 { + return errno(err) + } + defer f.Close(ctx) + summary, err := f.Summary(ctx) + if err != 0 { + return errno(err) + } + wb := utils.NewNativeBuffer(toBuf(buf, 24)) + wb.Put64(summary.Length) + wb.Put64(summary.Files) + wb.Put64(summary.Dirs) + return 24 +} + +//export jfs_statvfs +func jfs_statvfs(pid int, h uintptr, buf uintptr) int { + w := F(h) + if w == nil { + return EINVAL + } + total, avail := w.StatFS(w.withPid(pid)) + wb := utils.NewNativeBuffer(toBuf(buf, 16)) + wb.Put64(total) + wb.Put64(avail) + return 0 +} + +//export jfs_chmod +func jfs_chmod(pid int, h uintptr, cpath *C.char, mode C.mode_t) int { + w := F(h) + if w == nil { + return EINVAL + } + f, err := w.Open(w.withPid(pid), C.GoString(cpath), 0) + if err != 0 { + return errno(err) + } + defer f.Close(w.withPid(pid)) + return errno(f.Chmod(w.withPid(pid), uint16(mode))) +} + +//export jfs_utime +func jfs_utime(pid int, h uintptr, cpath *C.char, mtime, atime int64) int { + w := F(h) + if w == nil { + return EINVAL + } + f, err := w.Open(w.withPid(pid), C.GoString(cpath), 0) + if err != 0 { + return errno(err) + } + defer f.Close(w.withPid(pid)) + return errno(f.Utime(w.withPid(pid), atime, mtime)) +} + +//export jfs_setOwner +func jfs_setOwner(pid int, h uintptr, cpath *C.char, owner *C.char, group *C.char) int { + w := F(h) + if w == nil { + return EINVAL + } + return setOwner(w, w.withPid(pid), C.GoString(cpath), C.GoString(owner), C.GoString(group)) +} + +func setOwner(w *wrapper, ctx meta.Context, path string, owner, group string) int { + f, err := w.Open(ctx, path, 0) + if err != 0 { + return errno(err) + } + defer f.Close(ctx) + st, _ := f.Stat() + uid := uint32(st.(*fs.FileStat).Uid()) + gid := uint32(st.(*fs.FileStat).Gid()) + if owner != "" { + uid = w.lookupUid(owner) + } + if group != "" { + gid = w.lookupGid(group) + } + return errno(f.Chown(ctx, uid, gid)) +} + +//export jfs_listdir +func jfs_listdir(pid int, h uintptr, cpath *C.char, offset int, buf uintptr, bufsize int) int { + var ctx meta.Context + var f *fs.File + var w *wrapper + if offset > 0 { + filesLock.Lock() + fw := openFiles[int(h)] + filesLock.Unlock() + if fw == nil { + return EINVAL + } + freeHandle(int(h)) + w = fw.w + f = fw.File + ctx = w.withPid(pid) + } else { + w = F(h) + if w == nil { + return EINVAL + } + var err syscall.Errno + ctx = w.withPid(pid) + f, err = w.Open(ctx, C.GoString(cpath), 0) + if err != 0 { + return errno(err) + } + st, _ := f.Stat() + if !st.IsDir() { + return ENOTDIR + } + } + + es, err := f.ReaddirPlus(ctx, offset) + if err != 0 { + return errno(err) + } + + wb := utils.NewNativeBuffer(toBuf(buf, bufsize)) + for i, d := range es { + if wb.Left() < 1+len(d.Name)+1+130+8 { + wb.Put32(uint32(len(es) - i)) + wb.Put32(uint32(nextFileHandle(f, w))) + return bufsize - wb.Left() - 8 + } + wb.Put8(byte(len(d.Name))) + wb.Put(d.Name) + header := wb.Get(1) + header[0] = uint8(fill_stat(w, wb, fs.AttrToFileInfo(d.Inode, d.Attr))) + } + wb.Put32(0) + return bufsize - wb.Left() - 4 +} + +func toBuf(s uintptr, sz int) []byte { + return (*[1 << 30]byte)(unsafe.Pointer(s))[:sz:sz] +} + +//export jfs_concat +func jfs_concat(pid int, h uintptr, _dst *C.char, buf uintptr, bufsize int) int { + w := F(h) + if w == nil { + return EINVAL + } + dst := C.GoString(_dst) + ctx := w.withPid(pid) + df, err := w.Open(ctx, dst, vfs.MODE_MASK_W) + if err != 0 { + return errno(err) + } + defer df.Close(ctx) + srcs := strings.Split(string(toBuf(buf, bufsize-1)), "\000") + var tmp string + if len(srcs) > 1 { + tmp = filepath.Join(filepath.Dir(dst), "."+filepath.Base(dst)+".merging") + fi, err := w.Create(ctx, tmp, 0644) + if err != 0 { + return errno(err) + } + defer func() { _ = w.Delete(ctx, tmp) }() + defer fi.Close(ctx) + var off uint64 + for _, src := range srcs { + copied, err := w.CopyFileRange(ctx, src, 0, tmp, off, 1<<63) + if err != 0 { + return errno(err) + } + off += copied + } + } else { + tmp = srcs[0] + } + + dfi, _ := df.Stat() + _, err = w.CopyFileRange(ctx, tmp, 0, dst, uint64(dfi.Size()), 1<<63) + r := errno(err) + if r == 0 { + var wg sync.WaitGroup + var limit = make(chan bool, 100) + for _, src := range srcs { + limit <- true + wg.Add(1) + go func(p string) { + defer func() { <-limit }() + defer wg.Done() + if r := w.Delete(ctx, p); r != 0 { + logger.Errorf("delete source %s: %s", p, r) + } + }(src) + } + wg.Wait() + } + return r +} + +//export jfs_lseek +func jfs_lseek(pid, fd int, offset int64, whence int) int64 { + filesLock.Lock() + f, ok := openFiles[fd] + if ok { + filesLock.Unlock() + off, _ := f.Seek(f.w.withPid(pid), offset, whence) + return off + } + filesLock.Unlock() + return int64(EINVAL) +} + +//export jfs_read +func jfs_read(pid, fd int, cbuf uintptr, count int) int { + filesLock.Lock() + f, ok := openFiles[fd] + if !ok { + filesLock.Unlock() + return EINVAL + } + filesLock.Unlock() + + n, err := f.Read(f.w.withPid(pid), toBuf(cbuf, count)) + if err != nil && err != io.EOF { + logger.Errorf("read %s: %s", f.Name(), err) + return errno(err) + } + return n +} + +//export jfs_pread +func jfs_pread(pid, fd int, cbuf uintptr, count C.size_t, offset C.off_t) int { + filesLock.Lock() + f, ok := openFiles[fd] + if !ok { + filesLock.Unlock() + return EINVAL + } + filesLock.Unlock() + + if count > (1 << 30) { + count = 1 << 30 + } + n, err := f.Pread(f.w.withPid(pid), toBuf(cbuf, int(count)), int64(offset)) + if err != nil && err != io.EOF { + logger.Errorf("read %s: %s", f.Name(), err) + return errno(err) + } + return n +} + +//export jfs_write +func jfs_write(pid, fd int, cbuf uintptr, count C.size_t) int { + filesLock.Lock() + f, ok := openFiles[fd] + if !ok { + filesLock.Unlock() + return EINVAL + } + filesLock.Unlock() + + buf := toBuf(cbuf, int(count)) + n, err := f.Write(f.w.withPid(pid), buf) + if err != 0 { + logger.Errorf("write %s: %s", f.Name(), err) + return errno(err) + } + return n +} + +//export jfs_flush +func jfs_flush(pid, fd int) int { + filesLock.Lock() + f, ok := openFiles[fd] + if !ok { + filesLock.Unlock() + return EINVAL + } + filesLock.Unlock() + + return errno(f.Flush(f.w.withPid(pid))) +} + +//export jfs_fsync +func jfs_fsync(pid, fd int) int { + filesLock.Lock() + f, ok := openFiles[fd] + if !ok { + filesLock.Unlock() + return EINVAL + } + filesLock.Unlock() + + return errno(f.Fsync(f.w.withPid(pid))) +} + +//export jfs_close +func jfs_close(pid, fd int) int { + filesLock.Lock() + f, ok := openFiles[fd] + filesLock.Unlock() + if !ok { + return 0 + } + freeHandle(fd) + return errno(f.Close(f.w.withPid(pid))) +} + +func main() { +} diff --git a/sdk/java/pom.xml b/sdk/java/pom.xml new file mode 100644 index 0000000..e058d7e --- /dev/null +++ b/sdk/java/pom.xml @@ -0,0 +1,200 @@ + + 4.0.0 + io.juicefs + juicefs-hadoop + Hadoop FileSystem implementation for JuiceFS + juicefs-hadoop + 1.0-dev + jar + + 3.0.0 + -Djdk.net.URLClassPath.disableClassPathURLCheck=true -Djava.library.path=${project.basedir}/../mount/libjfs:${java.library.path} -Djdk.attach.allowAttachSelf=true + + + + + maven-surefire-plugin + 2.19.1 + + ${argLine} + false + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + ${artifactId}-${version} + + + org.objectweb.asm + juice.org.objectweb.asm + + + com.beust + juice.com.beust + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + io.juicefs.Main + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.7 + + + + prepare-agent + + + + report + test + + report + + + + + + + + libjfs/target + + + src/main/resources + + + + + conf + + + + + + com.github.jnr + jnr-ffi + 2.1.11 + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + provided + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + test + test-jar + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + provided + + + junit + junit + 4.13.1 + test + + + org.json + json + 20180813 + + + org.javassist + javassist + 3.25.0-GA + + + com.beust + jcommander + 1.81 + + + org.apache.hive + hive-metastore + + + org.apache.hadoop + hadoop-annotations + + + + provided + + 1.2.1 + + + org.apache.flink + flink-hadoop-fs + 1.10.3 + provided + + + org.apache.flink + flink-core + 1.10.3 + provided + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + test + + + org.apache.flink + flink-streaming-java_2.12 + 1.10.0 + test + + + org.kitesdk + kite-data-core + 1.1.0 + provided + + + * + * + + + + + diff --git a/sdk/java/src/main/java/io/juicefs/FlinkFileSystemFactory.java b/sdk/java/src/main/java/io/juicefs/FlinkFileSystemFactory.java new file mode 100644 index 0000000..46f6136 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/FlinkFileSystemFactory.java @@ -0,0 +1,68 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.runtime.fs.hdfs.HadoopFileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; + +public class FlinkFileSystemFactory implements org.apache.flink.core.fs.FileSystemFactory { + private static final Logger LOG = LoggerFactory.getLogger(FlinkFileSystemFactory.class); + private org.apache.hadoop.conf.Configuration conf; + + private static final String[] FLINK_CONFIG_PREFIXES = {"fs.", "juicefs."}; + private String scheme; + + @Override + public void configure(Configuration config) { + conf = new org.apache.hadoop.conf.Configuration(); + if (config != null) { + for (String key : config.keySet()) { + for (String prefix : FLINK_CONFIG_PREFIXES) { + if (key.startsWith(prefix)) { + String value = config.getString(key, null); + if (value != null) { + if ("io.juicefs.JuiceFileSystem".equals(value.trim())) { + this.scheme = key.split("\\.")[1]; + } + conf.set(key, value); + } + } + } + } + } + } + + @Override + public String getScheme() { + if (scheme == null) { + return "jfs"; + } + return scheme; + } + + @Override + public FileSystem create(URI fsUri) throws IOException { + JuiceFileSystem fs = new JuiceFileSystem(); + fs.initialize(fsUri, conf); + return new HadoopFileSystem(fs); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/JuiceFS.java b/sdk/java/src/main/java/io/juicefs/JuiceFS.java new file mode 100644 index 0000000..ca8ef46 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/JuiceFS.java @@ -0,0 +1,35 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegateToFileSystem; +import org.apache.hadoop.fs.FileSystem; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +public class JuiceFS extends DelegateToFileSystem { + JuiceFS(final URI uri, final Configuration conf) throws IOException, URISyntaxException { + super(uri, FileSystem.get(uri, conf), conf, uri.getScheme(), false); + } + + @Override + public int getUriDefaultPort() { + return -1; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/JuiceFileSystem.java b/sdk/java/src/main/java/io/juicefs/JuiceFileSystem.java new file mode 100644 index 0000000..ac12818 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/JuiceFileSystem.java @@ -0,0 +1,140 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import io.juicefs.utils.PatchUtil; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/**************************************************************** + * Implement the FileSystem API for JuiceFS + *****************************************************************/ +@InterfaceAudience.Public +@InterfaceStability.Stable +public class JuiceFileSystem extends FilterFileSystem { + private static final Logger LOG = LoggerFactory.getLogger(JuiceFileSystem.class); + + private static boolean fileChecksumEnabled = false; + private static boolean distcpPatched = false; + + private ScheduledExecutorService emptier; + + static { + PatchUtil.patchBefore("org.apache.flink.runtime.fs.hdfs.HadoopRecoverableFsDataOutputStream", + "waitUntilLeaseIsRevoked", + new String[]{"org.apache.hadoop.fs.FileSystem", "org.apache.hadoop.fs.Path"}, + "if (fs instanceof io.juicefs.JuiceFileSystem) {\n" + + " return ((io.juicefs.JuiceFileSystem)fs).isFileClosed(path);\n" + + " }"); + } + + private synchronized static void patchDistCpChecksum() { + if (distcpPatched) + return; + PatchUtil.patchBefore("org.apache.hadoop.tools.mapred.RetriableFileCopyCommand", + "compareCheckSums", + null, + "if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {return ;}"); + distcpPatched = true; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + fileChecksumEnabled = Boolean.parseBoolean(getConf(conf, "file.checksum", "false")); + startTrashEmptier(conf); + } + + private void startTrashEmptier(final Configuration conf) throws IOException { + + emptier = Executors.newScheduledThreadPool(1, r -> { + Thread t = new Thread(r, "Trash Emptier"); + t.setDaemon(true); + return t; + }); + + emptier.schedule(new Trash(this, conf).getEmptier(), 10, TimeUnit.MINUTES); + } + + private String getConf(Configuration conf, String key, String value) { + String name = fs.getUri().getHost(); + String v = conf.get("juicefs." + key, value); + if (name != null && !name.equals("")) { + v = conf.get("juicefs." + name + "." + key, v); + } + if (v != null) + v = v.trim(); + return v; + } + + public JuiceFileSystem() { + super(new JuiceFileSystemImpl()); + } + + @Override + public String getScheme() { + StackTraceElement[] elements = Thread.currentThread().getStackTrace(); + if (elements[2].getClassName().equals("org.apache.flink.runtime.fs.hdfs.HadoopRecoverableWriter") && + elements[2].getMethodName().equals("")) { + return "hdfs"; + } + return fs.getScheme(); + } + + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + return fs.getContentSummary(f); + } + + public boolean isFileClosed(final Path src) throws IOException { + FileStatus st = fs.getFileStatus(src); + return st.getLen() > 0; + } + + @Override + public FileChecksum getFileChecksum(Path f, long length) throws IOException { + if (!fileChecksumEnabled) + return null; + patchDistCpChecksum(); + return super.getFileChecksum(f, length); + } + + @Override + public FileChecksum getFileChecksum(Path f) throws IOException { + if (!fileChecksumEnabled) + return null; + patchDistCpChecksum(); + return super.getFileChecksum(f); + } + + @Override + public void close() throws IOException { + if (this.emptier != null) { + emptier.shutdownNow(); + } + super.close(); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java b/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java new file mode 100644 index 0000000..90a0936 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java @@ -0,0 +1,1590 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import com.kenai.jffi.internal.StubLoader; +import io.juicefs.metrics.JuiceFSInstrumentation; +import io.juicefs.utils.ConsistentHash; +import io.juicefs.utils.NodesFetcher; +import io.juicefs.utils.NodesFetcherBuilder; +import jnr.ffi.LibraryLoader; +import jnr.ffi.Memory; +import jnr.ffi.Pointer; +import jnr.ffi.Runtime; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.DataChecksum; +import org.apache.hadoop.util.DirectBufferPool; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.VersionInfo; +import org.json.JSONObject; +import sun.nio.ch.DirectBuffer; + +import java.io.*; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.*; +import java.nio.ByteBuffer; +import java.nio.file.Paths; +import java.util.*; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.jar.JarFile; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; + +/**************************************************************** + * Implement the FileSystem API for JuiceFS + *****************************************************************/ +@InterfaceAudience.Public +@InterfaceStability.Stable +public class JuiceFileSystemImpl extends FileSystem { + + public static final Log LOG = LogFactory.getLog(JuiceFileSystemImpl.class); + + private Path workingDir; + private String name; + private URI uri; + private long blocksize; + private int minBufferSize; + private int cacheReplica; + private boolean fileChecksumEnabled; + private Libjfs lib; + private long handle; + private UserGroupInformation ugi; + private String homeDirPrefix = "/user"; + private Map cachedHosts = new HashMap<>(); // (ip, hostname) + private ConsistentHash hash = new ConsistentHash<>(1, Collections.singletonList("localhost")); + private FsPermission uMask; + private String hflushMethod; + private ScheduledExecutorService nodesFetcherThread; + private ScheduledExecutorService refreshUidThread; + private Map lastFileStatus = new HashMap<>(); + private static final DirectBufferPool bufferPool = new DirectBufferPool(); + private boolean metricsEnable = false; + + /* + * hadoop compability + */ + private boolean withStreamCapability; + // constructor for BufferedFSOutputStreamWithStreamCapabilities + private Constructor constructor; + private Method setStorageIds; + private String[] storageIds; + private Random random = new Random(); + + public static interface Libjfs { + long jfs_init(String name, String jsonConf, String user, String group, String superuser, String supergroup); + + void jfs_update_uid_grouping(long h, String uidstr, String grouping); + + int jfs_term(long pid, long h); + + int jfs_open(long pid, long h, String path, int flags); + + int jfs_access(long pid, long h, String path, int flags); + + long jfs_lseek(long pid, int fd, long pos, int whence); + + int jfs_pread(long pid, int fd, Pointer b, int len, long offset); + + int jfs_write(long pid, int fd, Pointer b, int len); + + int jfs_flush(long pid, int fd); + + int jfs_fsync(long pid, int fd); + + int jfs_close(long pid, int fd); + + int jfs_create(long pid, long h, String path, short mode); + + int jfs_truncate(long pid, long h, String path, long length); + + int jfs_delete(long pid, long h, String path); + + int jfs_rmr(long pid, long h, String path); + + int jfs_mkdir(long pid, long h, String path, short mode); + + int jfs_rename(long pid, long h, String src, String dst); + + int jfs_stat1(long pid, long h, String path, Pointer buf); + + int jfs_lstat1(long pid, long h, String path, Pointer buf); + + int jfs_summary(long pid, long h, String path, Pointer buf); + + int jfs_statvfs(long pid, long h, Pointer buf); + + int jfs_chmod(long pid, long h, String path, int mode); + + int jfs_setOwner(long pid, long h, String path, String user, String group); + + int jfs_utime(long pid, long h, String path, long mtime, long atime); + + int jfs_listdir(long pid, long h, String path, int offset, Pointer buf, int size); + + int jfs_concat(long pid, long h, String path, Pointer buf, int bufsize); + + int jfs_setXattr(long pid, long h, String path, String name, Pointer value, int vlen, int mode); + + int jfs_getXattr(long pid, long h, String path, String name, Pointer buf, int size); + + int jfs_listXattr(long pid, long h, String path, Pointer buf, int size); + + int jfs_removeXattr(long pid, long h, String path, String name); + } + + static int EPERM = -0x01; + static int ENOENT = -0x02; + static int EINTR = -0x04; + static int EIO = -0x05; + static int EACCESS = -0xd; + static int EEXIST = -0x11; + static int ENOTDIR = -0x14; + static int EINVAL = -0x16; + static int ENOSPACE = -0x1c; + static int EROFS = -0x1e; + static int ENOTEMPTY = -0x27; + static int ENODATA = -0x3d; + static int ENOATTR = -0x5d; + static int ENOTSUP = -0x5f; + + static int MODE_MASK_R = 4; + static int MODE_MASK_W = 2; + static int MODE_MASK_X = 1; + + private IOException error(int errno, Path p) { + if (errno == EPERM) { + return new PathPermissionException(p.toString()); + } else if (errno == ENOTDIR) { + return new ParentNotDirectoryException(); + } else if (errno == ENOENT) { + return new FileNotFoundException(p.toString() + ": not found"); + } else if (errno == EACCESS) { + try { + String user = ugi.getShortUserName(); + FileStatus stat = getFileStatusInternalNoException(p); + if (stat != null) { + FsPermission perm = stat.getPermission(); + return new AccessControlException(String.format("Permission denied: user=%s, path=\"%s\":%s:%s:%s%s", user, p, + stat.getOwner(), stat.getGroup(), stat.isDirectory() ? "d" : "-", perm)); + } + } catch (Exception e) { + LOG.warn("fail to generate better error message", e); + } + return new AccessControlException("Permission denied: " + p.toString()); + } else if (errno == EEXIST) { + return new FileAlreadyExistsException(); + } else if (errno == EINVAL) { + return new InvalidRequestException("Invalid parameter"); + } else if (errno == ENOTEMPTY) { + return new PathIsNotEmptyDirectoryException(p.toString()); + } else if (errno == EINTR) { + return new InterruptedIOException(); + } else if (errno == ENOTSUP) { + return new PathOperationException(p.toString()); + } else if (errno == ENOSPACE) { + return new IOException("No space"); + } else if (errno == EROFS) { + return new IOException("Read-only Filesystem"); + } else if (errno == EIO) { + return new IOException(p.toString()); + } else { + return new IOException("errno: " + errno + " " + p.toString()); + } + } + + public JuiceFileSystemImpl() { + } + + @Override + public long getDefaultBlockSize() { + return blocksize; + } + + private String normalizePath(Path path) { + return makeQualified(path).toUri().getPath(); + } + + public String getScheme() { + return uri.getScheme(); + } + + @Override + public String toString() { + return uri.toString(); + } + + @Override + public URI getUri() { + return uri; + } + + private String getConf(Configuration conf, String key, String value) { + String v = conf.get("juicefs." + key, value); + if (name != null && !name.equals("")) { + v = conf.get("juicefs." + name + "." + key, v); + } + if (v != null) + v = v.trim(); + return v; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + setConf(conf); + + this.uri = uri; + name = conf.get("juicefs.name", uri.getHost()); + if (null == name) { + throw new IOException("name is required"); + } + + blocksize = conf.getLong("juicefs.block.size", conf.getLong("dfs.blocksize", 128 << 20)); + minBufferSize = conf.getInt("juicefs.min-buffer-size", 128 << 10); + cacheReplica = Integer.parseInt(getConf(conf, "cache-replica", "1")); + fileChecksumEnabled = Boolean.parseBoolean(getConf(conf, "file.checksum", "false")); + + this.ugi = UserGroupInformation.getCurrentUser(); + String user = ugi.getShortUserName(); + String group = "nogroup"; + String groupingFile = getConf(conf, "groups", null); + if (isEmpty(groupingFile) && ugi.getGroupNames().length > 0) { + group = String.join(",", ugi.getGroupNames()); + } + String superuser = getConf(conf, "superuser", "hdfs"); + String supergroup = getConf(conf, "supergroup", conf.get("dfs.permissions.superusergroup", "supergroup")); + String mountpoint = getConf(conf, "mountpoint", ""); + + initCache(conf); + refreshCache(conf); + + lib = loadLibrary(); + JSONObject obj = new JSONObject(); + String[] keys = new String[]{"meta",}; + for (String key : keys) { + obj.put(key, getConf(conf, key, "")); + } + String[] bkeys = new String[]{"debug", "writeback"}; + for (String key : bkeys) { + obj.put(key, Boolean.valueOf(getConf(conf, key, "false"))); + } + obj.put("bucket", getConf(conf, "bucket", "")); + obj.put("readOnly", Boolean.valueOf(getConf(conf, "read-only", "false"))); + obj.put("cacheDir", getConf(conf, "cache-dir", "memory")); + obj.put("cacheSize", Integer.valueOf(getConf(conf, "cache-size", "100"))); + obj.put("openCache", Float.valueOf(getConf(conf, "open-cache", "0.0"))); + obj.put("backupMeta", Integer.valueOf(getConf(conf, "backup-meta", "3600"))); + obj.put("attrTimeout", Float.valueOf(getConf(conf, "attr-cache", "0.0"))); + obj.put("entryTimeout", Float.valueOf(getConf(conf, "entry-cache", "0.0"))); + obj.put("dirEntryTimeout", Float.valueOf(getConf(conf, "dir-entry-cache", "0.0"))); + obj.put("cacheFullBlock", Boolean.valueOf(getConf(conf, "cache-full-block", "true"))); + obj.put("metacache", Boolean.valueOf(getConf(conf, "metacache", "true"))); + obj.put("autoCreate", Boolean.valueOf(getConf(conf, "auto-create-cache-dir", "true"))); + obj.put("maxUploads", Integer.valueOf(getConf(conf, "max-uploads", "20"))); + obj.put("maxDeletes", Integer.valueOf(getConf(conf, "max-deletes", "2"))); + obj.put("uploadLimit", Integer.valueOf(getConf(conf, "upload-limit", "0"))); + obj.put("downloadLimit", Integer.valueOf(getConf(conf, "download-limit", "0"))); + obj.put("getTimeout", Integer.valueOf(getConf(conf, "get-timeout", getConf(conf, "object-timeout", "5")))); + obj.put("putTimeout", Integer.valueOf(getConf(conf, "put-timeout", getConf(conf, "object-timeout", "60")))); + obj.put("memorySize", Integer.valueOf(getConf(conf, "memory-size", "300"))); + obj.put("prefetch", Integer.valueOf(getConf(conf, "prefetch", "1"))); + obj.put("readahead", Integer.valueOf(getConf(conf, "max-readahead", "0"))); + obj.put("pushGateway", getConf(conf, "push-gateway", "")); + obj.put("pushInterval", Integer.valueOf(getConf(conf, "push-interval", "10"))); + obj.put("pushAuth", getConf(conf, "push-auth", "")); + obj.put("fastResolve", Boolean.valueOf(getConf(conf, "fast-resolve", "true"))); + obj.put("noUsageReport", Boolean.valueOf(getConf(conf, "no-usage-report", "false"))); + obj.put("freeSpace", getConf(conf, "free-space", "0.1")); + obj.put("accessLog", getConf(conf, "access-log", "")); + String jsonConf = obj.toString(2); + handle = lib.jfs_init(name, jsonConf, user, group, superuser, supergroup); + if (handle <= 0) { + throw new IOException("JuiceFS initialized failed for jfs://" + name); + } + homeDirPrefix = conf.get("dfs.user.home.dir.prefix", "/user"); + this.workingDir = getHomeDirectory(); + + // hadoop29 and above check + try { + Class.forName("org.apache.hadoop.fs.StreamCapabilities"); + withStreamCapability = true; + } catch (ClassNotFoundException e) { + withStreamCapability = false; + } + if (withStreamCapability) { + try { + constructor = Class.forName("io.juicefs.JuiceFileSystemImpl$BufferedFSOutputStreamWithStreamCapabilities") + .getConstructor(OutputStream.class, Integer.TYPE, String.class); + } catch (ClassNotFoundException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + uMask = FsPermission.getUMask(conf); + String umaskStr = getConf(conf, "umask", null); + if (!isEmpty(umaskStr)) { + uMask = new FsPermission(umaskStr); + } + + hflushMethod = getConf(conf, "hflush", "writeback"); + initializeStorageIds(conf); + + if ("true".equalsIgnoreCase(getConf(conf, "enable-metrics", "false"))) { + metricsEnable = true; + JuiceFSInstrumentation.init(this, statistics); + } + + String uidFile = getConf(conf, "users", null); + if (!isEmpty(uidFile) || !isEmpty(groupingFile)) { + updateUidAndGrouping(uidFile, groupingFile); + refreshUidAndGrouping(uidFile, groupingFile); + } + } + + private boolean isEmpty(String str) { + return str == null || str.trim().isEmpty(); + } + + private String readFile(String file) { + Path path = new Path(file); + URI uri = path.toUri(); + FileSystem fs; + try { + URI defaultUri = getDefaultUri(getConf()); + if (uri.getScheme() == null) { + uri = defaultUri; + } else { + if (uri.getAuthority() == null && (uri.getScheme().equals(defaultUri.getScheme()))) { + uri = defaultUri; + } + } + if (getScheme().equals(uri.getScheme()) && + (name != null && name.equals(uri.getAuthority()))) { + fs = this; + } else { + fs = path.getFileSystem(getConf()); + } + + FileStatus lastStatus = lastFileStatus.get(file); + FileStatus status = fs.getFileStatus(path); + if (lastStatus != null && status.getModificationTime() == lastStatus.getModificationTime() + && status.getLen() == lastStatus.getLen()) { + return null; + } + FSDataInputStream in = fs.open(path); + String res = new BufferedReader(new InputStreamReader(in)).lines().collect(Collectors.joining("\n")); + in.close(); + lastFileStatus.put(file, status); + return res; + } catch (IOException e) { + LOG.warn(String.format("read %s failed", file), e); + return null; + } + } + + private void updateUidAndGrouping(String uidFile, String groupFile) { + String uidstr = null; + if (uidFile != null && !"".equals(uidFile.trim())) { + uidstr = readFile(uidFile); + } + String grouping = null; + if (groupFile != null && !"".equals(groupFile.trim())) { + grouping = readFile(groupFile); + } + + lib.jfs_update_uid_grouping(handle, uidstr, grouping); + } + + private void refreshUidAndGrouping(String uidFile, String groupFile) { + refreshUidThread = Executors.newScheduledThreadPool(1, r -> { + Thread thread = new Thread(r, "Uid and group refresher"); + thread.setDaemon(true); + return thread; + }); + refreshUidThread.scheduleAtFixedRate(() -> { + updateUidAndGrouping(uidFile, groupFile); + }, 1, 1, TimeUnit.MINUTES); + } + + private void initializeStorageIds(Configuration conf) throws IOException { + try { + Class clazz = Class.forName("org.apache.hadoop.fs.BlockLocation"); + setStorageIds = clazz.getMethod("setStorageIds", String[].class); + } catch (ClassNotFoundException e) { + throw new IllegalStateException( + "Hadoop version was incompatible, current hadoop version is:\t" + VersionInfo.getVersion()); + } catch (NoSuchMethodException e) { + setStorageIds = null; + } + int vdiskPerCpu = Integer.parseInt(getConf(conf, "vdisk-per-cpu", "4")); + storageIds = new String[java.lang.Runtime.getRuntime().availableProcessors() * vdiskPerCpu]; + for (int i = 0; i < storageIds.length; i++) { + storageIds[i] = "vd" + i; + } + } + + @Override + public Path getHomeDirectory() { + return makeQualified(new Path(homeDirPrefix + "/" + ugi.getShortUserName())); + } + + private static void initStubLoader() { + int loadMaxTime = 30; + long start = System.currentTimeMillis(); + Class clazz = null; + // first try + try { + clazz = Class.forName("com.kenai.jffi.internal.StubLoader"); + } catch (ClassNotFoundException e) { + } + + // try try try ... + while (StubLoader.getFailureCause() != null && (System.currentTimeMillis() - start) < loadMaxTime * 1000) { + LOG.warn("StubLoader load failed, it'll be retried!"); + try { + Thread.interrupted(); + Method load = clazz.getDeclaredMethod("load"); + load.setAccessible(true); + load.invoke(null); + + Field loaded = clazz.getDeclaredField("loaded"); + loaded.setAccessible(true); + loaded.set(null, true); + + Field failureCause = clazz.getDeclaredField("failureCause"); + failureCause.setAccessible(true); + failureCause.set(null, null); + } catch (Throwable e) { + } + } + + if (StubLoader.getFailureCause() != null) { + throw new RuntimeException("StubLoader load failed", StubLoader.getFailureCause()); + } + } + + public static Libjfs loadLibrary() throws IOException { + initStubLoader(); + + LibraryLoader libjfsLibraryLoader = LibraryLoader.create(Libjfs.class); + libjfsLibraryLoader.failImmediately(); + String name = "libjfs.4.so"; + File dir = new File("/tmp"); + String os = System.getProperty("os.name"); + if (os.toLowerCase().contains("windows")) { + name = "libjfs3.dll"; + dir = new File(System.getProperty("java.io.tmpdir")); + } + File libFile = new File(dir, name); + URL res = JuiceFileSystemImpl.class.getResource("/libjfs.so.gz"); + if (res == null) { + // jar may changed + return libjfsLibraryLoader.load(libFile.getAbsolutePath()); + } + URLConnection conn; + try { + conn = res.openConnection(); + } catch (FileNotFoundException e) { + // jar may changed + return libjfsLibraryLoader.load(libFile.getAbsolutePath()); + } + + long soTime = conn.getLastModified(); + if (res.getProtocol().equalsIgnoreCase("jar")) { + String jarPath = Paths.get(URI.create(res.getFile())) + .getParent().toUri().getPath().trim(); + jarPath = jarPath.substring(0, jarPath.length() - 1); + soTime = new JarFile(jarPath).getJarEntry("libjfs.so.gz") + .getLastModifiedTime() + .toMillis(); + } + + InputStream ins = conn.getInputStream(); + synchronized (JuiceFileSystemImpl.class) { + if (!libFile.exists() || libFile.lastModified() < soTime) { + // try the name for current user + libFile = new File(dir, System.getProperty("user.name") + "-" + name); + if (!libFile.exists() || libFile.lastModified() < soTime) { + InputStream reader = new GZIPInputStream(ins); + File tmp = File.createTempFile(name, null, dir); + FileOutputStream writer = new FileOutputStream(tmp); + byte[] buffer = new byte[128 << 10]; + int bytesRead = 0; + while ((bytesRead = reader.read(buffer)) != -1) { + writer.write(buffer, 0, bytesRead); + } + writer.close(); + reader.close(); + tmp.setLastModified(soTime); + tmp.setReadable(true, false); + new File(dir, name).delete(); + if (tmp.renameTo(new File(dir, name))) { + // updated libjfs.so + libFile = new File(dir, name); + } else { + libFile.delete(); + if (!tmp.renameTo(libFile)) { + throw new IOException("Can't update " + libFile); + } + } + } + } + } + ins.close(); + return libjfsLibraryLoader.load(libFile.getAbsolutePath()); + } + + private void initCache(Configuration conf) { + try { + List nodes = Arrays.asList(getConf(conf, "nodes", "localhost").split(",")); + if (nodes.size() == 1 && "localhost".equals(nodes.get(0))) { + String urls = getConf(conf, "discover-nodes-url", null); + if (urls != null) { + List newNodes = discoverNodes(urls); + Map newCachedHosts = new HashMap<>(); + for (String newNode : newNodes) { + try { + newCachedHosts.put(InetAddress.getByName(newNode).getHostAddress(), newNode); + } catch (UnknownHostException e) { + LOG.warn("unknown host: " + newNode); + } + } + + // if newCachedHosts are not changed, skip + if (!newCachedHosts.equals(cachedHosts)) { + List ips = new ArrayList<>(newCachedHosts.keySet()); + LOG.debug("update nodes to: " + String.join(",", ips)); + this.hash = new ConsistentHash<>(100, ips); + this.cachedHosts = newCachedHosts; + } + } + } + } catch (Throwable e) { + LOG.warn(e); + } + } + + private void refreshCache(Configuration conf) { + nodesFetcherThread = Executors.newScheduledThreadPool(1, r -> { + Thread thread = new Thread(r, "Node fetcher"); + thread.setDaemon(true); + return thread; + }); + nodesFetcherThread.scheduleAtFixedRate(() -> { + initCache(conf); + }, 10, 10, TimeUnit.MINUTES); + } + + private List discoverNodes(String urls) { + NodesFetcher fetcher = NodesFetcherBuilder.buildFetcher(urls, name); + List fetched = fetcher.fetchNodes(urls); + if (fetched == null || fetched.isEmpty()) { + return Collections.singletonList("localhost"); + } else { + return fetched; + } + } + + private BlockLocation makeLocation(long code, long start, long len) { + long index = (start + len / 2) / blocksize / 4; + BlockLocation blockLocation; + String[] ns = new String[cacheReplica]; + String[] hs = new String[cacheReplica]; + String host = cachedHosts.getOrDefault(hash.get(code + "-" + index), "localhost"); + ns[0] = host + ":50010"; + hs[0] = host; + for (int i = 1; i < cacheReplica; i++) { + String h = hash.get(code + "-" + (index + i)); + ns[i] = h + ":50010"; + hs[i] = h; + } + blockLocation = new BlockLocation(ns, hs, null, null, start, len, false); + if (setStorageIds != null) { + try { + setStorageIds.invoke(blockLocation, (Object) getStorageIds()); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + return blockLocation; + } + + private String[] getStorageIds() { + String[] res = new String[cacheReplica]; + for (int i = 0; i < cacheReplica; i++) { + res[i] = storageIds[random.nextInt(storageIds.length)]; + } + return res; + } + + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { + if (file == null) { + return null; + } + if (start < 0 || len < 0) { + throw new IllegalArgumentException("Invalid start or len parameter"); + } + if (file.getLen() <= start) { + return new BlockLocation[0]; + } + if (cacheReplica <= 0) { + String[] name = new String[]{"localhost:50010"}; + String[] host = new String[]{"localhost"}; + return new BlockLocation[]{new BlockLocation(name, host, 0L, file.getLen())}; + } + if (file.getLen() <= start + len) { + len = file.getLen() - start; + } + long code = normalizePath(file.getPath()).hashCode(); + BlockLocation[] locs = new BlockLocation[(int) (len / blocksize) + 2]; + int indx = 0; + while (len > 0) { + long blen = len < blocksize ? len : blocksize - start % blocksize; + locs[indx] = makeLocation(code, start, blen); + start += blen; + len -= blen; + indx++; + } + // merge the last block + if (indx > 1 && locs[indx - 1].getLength() < blocksize / 10) { + locs[indx - 2].setLength(locs[indx - 2].getLength() + locs[indx - 1].getLength()); + indx--; + } + // merge the first block + if (indx > 1 && locs[0].getLength() < blocksize / 10) { + locs[1].setOffset(locs[0].getOffset()); + locs[1].setLength(locs[0].getLength() + locs[1].getLength()); + locs = Arrays.copyOfRange(locs, 1, indx); + indx--; + } + return Arrays.copyOfRange(locs, 0, indx); + } + + /******************************************************* + * For open()'s FSInputStream. + *******************************************************/ + class FileInputStream extends FSInputStream implements ByteBufferReadable { + private int fd; + private final Path path; + + private ByteBuffer buf; + private long position; + + public FileInputStream(Path f, int fd, int size) throws IOException { + path = f; + this.fd = fd; + buf = bufferPool.getBuffer(size); + buf.limit(0); + position = 0; + } + + @Override + public synchronized long getPos() throws IOException { + if (buf == null) + throw new IOException("stream was closed"); + return position - buf.remaining(); + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public synchronized int available() throws IOException { + if (buf == null) + throw new IOException("stream was closed"); + return buf.remaining(); + } + + @Override + public boolean markSupported() { + return false; + } + + @Override + public void reset() throws IOException { + throw new IOException("Mark/reset not supported"); + } + + public synchronized int read() throws IOException { + if (buf == null) + throw new IOException("stream was closed"); + if (!buf.hasRemaining() && !refill()) + return -1; // EOF + assert buf.hasRemaining(); + statistics.incrementBytesRead(1); + return buf.get() & 0xFF; + } + + public synchronized int read(byte[] b, int off, int len) throws IOException { + if (off < 0 || len < 0 || b.length - off < len) + throw new IndexOutOfBoundsException(); + if (len == 0) + return 0; + if (buf == null) + throw new IOException("stream was closed"); + if (!buf.hasRemaining() && len <= buf.capacity() && !refill()) + return -1; // No bytes were read before EOF. + + int read = Math.min(buf.remaining(), len); + if (read > 0) { + buf.get(b, off, read); + statistics.incrementBytesRead(read); + off += read; + len -= read; + } + if (len == 0) + return read; + int more = read(position, b, off, len); + if (more <= 0) { + if (read > 0) { + return read; + } else { + return -1; + } + } + position += more; + buf.position(0); + buf.limit(0); + return read + more; + } + + private boolean refill() throws IOException { + buf.clear(); + int read = read(position, buf); + if (read <= 0) { + buf.limit(0); + return false; // EOF + } + statistics.incrementBytesRead(-read); + buf.position(0); + buf.limit(read); + position += read; + return true; + } + + @Override + public synchronized int read(long pos, byte[] b, int off, int len) throws IOException { + if (len == 0) + return 0; + if (buf == null) + throw new IOException("stream was closed"); + if (pos < 0) + throw new EOFException("position is negative"); + if (b == null || off < 0 || len < 0 || b.length - off < len) { + throw new IllegalArgumentException("arguments: " + off + " " + len); + } + if (len > 128 << 20) { + len = 128 << 20; + } + Pointer tmp = Memory.allocate(Runtime.getRuntime(lib), len); + int got = lib.jfs_pread(Thread.currentThread().getId(), fd, tmp, len, pos); + if (got == 0) + return -1; + if (got == EINVAL) + throw new IOException("stream was closed"); + if (got < 0) + throw error(got, path); + tmp.get(0, b, off, got); + statistics.incrementBytesRead(got); + return got; + } + + @Override + public synchronized int read(ByteBuffer b) throws IOException { + if (!b.hasRemaining()) + return 0; + if (buf == null) + throw new IOException("stream was closed"); + if (!buf.hasRemaining() && b.remaining() <= buf.capacity() && !refill()) { + return -1; + } + int got = 0; + while (b.hasRemaining() && buf.hasRemaining()) { + b.put(buf.get()); + got++; + } + statistics.incrementBytesRead(got); + if (!b.hasRemaining()) + return got; + int more = read(position, b); + if (more <= 0) + return got > 0 ? got : -1; + position += more; + buf.position(0); + buf.limit(0); + return got + more; + } + + public synchronized int read(long pos, ByteBuffer b) throws IOException { + if (!b.hasRemaining()) + return 0; + int got; + if (b.hasArray()) { + got = read(pos, b.array(), b.position(), b.remaining()); + if (got <= 0) + return got; + } else { + assert b.isDirect(); + long address = ((DirectBuffer) b).address() + b.position(); + Pointer destPtr = Runtime.getRuntime(lib).getMemoryManager().newPointer(address); + got = lib.jfs_pread(Thread.currentThread().getId(), fd, destPtr, b.remaining(), pos); + if (got == EINVAL) + throw new IOException("stream was closed"); + if (got < 0) + throw error(got, path); + if (got == 0) + return -1; + statistics.incrementBytesRead(got); + } + b.position(b.position() + got); + return got; + } + + @Override + public synchronized void seek(long p) throws IOException { + if (p < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (buf == null) + throw new IOException("stream was closed"); + if (p < position && p >= position - buf.limit()) { + buf.position((int) (p - (position - buf.limit()))); + } else { + buf.position(0); + buf.limit(0); + position = p; + } + } + + @Override + public synchronized long skip(long n) throws IOException { + if (n < 0) + return -1; + if (buf == null) + throw new IOException("stream was closed"); + if (n < buf.remaining()) { + buf.position(buf.position() + (int) n); + } else { + position += n - buf.remaining(); + buf.position(0); + buf.limit(0); + } + return n; + } + + @Override + public synchronized void close() throws IOException { + if (buf == null) { + return; // already closed + } + bufferPool.returnBuffer(buf); + buf = null; + int r = lib.jfs_close(Thread.currentThread().getId(), fd); + fd = 0; + if (r < 0) + throw error(r, path); + } + } + + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + statistics.incrementReadOps(1); + int fd = lib.jfs_open(Thread.currentThread().getId(), handle, normalizePath(f), MODE_MASK_R); + if (fd < 0) { + throw error(fd, f); + } + return new FSDataInputStream(new FileInputStream(f, fd, checkBufferSize(bufferSize))); + } + + @Override + public void access(Path path, FsAction mode) throws IOException { + int r = lib.jfs_access(Thread.currentThread().getId(), handle, normalizePath(path), mode.ordinal()); + if (r < 0) + throw error(r, path); + } + + /********************************************************* + * For create()'s FSOutputStream. + *********************************************************/ + class FSOutputStream extends OutputStream { + private int fd; + private Path path; + + private FSOutputStream(int fd, Path p) throws IOException { + this.fd = fd; + this.path = p; + } + + @Override + public void close() throws IOException { + int r = lib.jfs_close(Thread.currentThread().getId(), fd); + if (r < 0) + throw error(r, path); + } + + @Override + public void flush() throws IOException { + } + + public void hflush() throws IOException { + int r = lib.jfs_flush(Thread.currentThread().getId(), fd); + if (r == EINVAL) + throw new IOException("stream was closed"); + if (r < 0) + throw error(r, path); + } + + public void fsync() throws IOException { + int r = lib.jfs_fsync(Thread.currentThread().getId(), fd); + if (r == EINVAL) + throw new IOException("stream was closed"); + if (r < 0) + throw error(r, path); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (b.length - off < len) { + throw new IndexOutOfBoundsException(); + } + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), len); + buf.put(0, b, off, len); + int done = lib.jfs_write(Thread.currentThread().getId(), fd, buf, len); + if (done == EINVAL) + throw new IOException("stream was closed"); + if (done < 0) + throw error(done, path); + if (done < len) { + throw new IOException("write"); + } + } + + @Override + public void write(int b) throws IOException { + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 1); + buf.putByte(0, (byte) b); + int done = lib.jfs_write(Thread.currentThread().getId(), fd, buf, 1); + if (done < 0) + throw error(done, path); + if (done < 1) + throw new IOException("write"); + } + } + + static class BufferedFSOutputStream extends BufferedOutputStream implements Syncable { + private String hflushMethod; + + public BufferedFSOutputStream(OutputStream out) { + super(out); + hflushMethod = "writeback"; + } + + public BufferedFSOutputStream(OutputStream out, int size, String hflushMethod) { + super(out, size); + this.hflushMethod = hflushMethod; + } + + public void sync() throws IOException { + hflush(); + } + + @Override + public void hflush() throws IOException { + flush(); + if (hflushMethod.equals("writeback")) { + ((FSOutputStream) out).hflush(); + } else if (hflushMethod.equals("sync") || hflushMethod.equals("fsync")) { + ((FSOutputStream) out).fsync(); + } else { + // nothing + } + } + + @Override + public void hsync() throws IOException { + flush(); + ((FSOutputStream) out).fsync(); + } + } + + static class BufferedFSOutputStreamWithStreamCapabilities extends BufferedFSOutputStream + implements StreamCapabilities { + public BufferedFSOutputStreamWithStreamCapabilities(OutputStream out) { + super(out); + } + + public BufferedFSOutputStreamWithStreamCapabilities(OutputStream out, int size, String hflushMethod) { + super(out, size, hflushMethod); + } + + @Override + public boolean hasCapability(String capability) { + return capability.equalsIgnoreCase("hsync") || capability.equalsIgnoreCase(("hflush")); + } + } + + @Override + public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + int fd = lib.jfs_open(Thread.currentThread().getId(), handle, normalizePath(f), MODE_MASK_W); + if (fd < 0) + throw error(fd, f); + long r = lib.jfs_lseek(Thread.currentThread().getId(), fd, 0, 2); + if (r < 0) + throw error((int) r, f); + return createFsDataOutputStream(f, bufferSize, fd, r); + } + + @Override + public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, + short replication, long blockSize, Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + permission = permission.applyUMask(uMask); + while (true) { + int fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort()); + if (fd == ENOENT) { + Path parent = f.getParent(); + FsPermission perm = FsPermission.getDirDefault().applyUMask(FsPermission.getUMask(getConf())); + try { + mkdirs(parent, perm); + } catch (FileAlreadyExistsException e) { + } + continue; + } + if (fd == EEXIST) { + if (!overwrite || isDirectory(f)) { + throw new FileAlreadyExistsException("Path already exists: " + f); + } + delete(f, false); + continue; + } + if (fd < 0) { + throw error(fd, f.getParent()); + } + return createFsDataOutputStream(f, bufferSize, fd, 0L); + } + } + + private int checkBufferSize(int size) { + if (size < minBufferSize) { + size = minBufferSize; + } + return size; + } + + @Override + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flag, + int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + permission = permission.applyUMask(uMask); + int fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort()); + while (fd == EEXIST) { + if (!flag.contains(CreateFlag.OVERWRITE) || isDirectory(f)) { + throw new FileAlreadyExistsException("File already exists: " + f); + } + delete(f, false); + fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort()); + } + if (fd < 0) { + throw error(fd, f.getParent()); + } + return createFsDataOutputStream(f, bufferSize, fd, 0L); + } + + private FSDataOutputStream createFsDataOutputStream(Path f, int bufferSize, int fd, long startPosition) throws IOException { + FSOutputStream out = new FSOutputStream(fd, f); + if (withStreamCapability) { + try { + return new FSDataOutputStream( + (OutputStream) constructor.newInstance(out, checkBufferSize(bufferSize), hflushMethod), statistics, startPosition); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } else { + return new FSDataOutputStream(new BufferedFSOutputStream(out, checkBufferSize(bufferSize), hflushMethod), + statistics, startPosition); + } + } + + @Override + public FileChecksum getFileChecksum(Path f, long length) throws IOException { + statistics.incrementReadOps(1); + if (!fileChecksumEnabled) + return null; + String combineMode = getConf().get("dfs.checksum.combine.mode", "MD5MD5CRC"); + if (!combineMode.equals("MD5MD5CRC")) + return null; + DataChecksum.Type ctype = DataChecksum.Type.valueOf(getConf().get("dfs.checksum.type", "CRC32C")); + if (ctype.size != 4) + return null; + + int bytesPerCrc = getConf().getInt("io.bytes.per.checksum", 512); + DataChecksum summer = DataChecksum.newDataChecksum(ctype, bytesPerCrc); + + long crcPerBlock = 0; + DataOutputBuffer checksumBuf = new DataOutputBuffer(); + DataOutputBuffer crcBuf = new DataOutputBuffer(); + byte[] buf = new byte[bytesPerCrc]; + FSDataInputStream in = open(f, 1 << 20); + while (length > 0) { + for (int i = 0; i < blocksize / bytesPerCrc && length > 0; i++) { + int n; + if (length < bytesPerCrc) { + n = in.read(buf, 0, (int) length); + } else { + n = in.read(buf); + } + if (n <= 0) { + length = 0; // EOF + } else { + summer.update(buf, 0, n); + summer.writeValue(crcBuf, true); + length -= n; + } + } + if (crcBuf.getLength() > 0) { + MD5Hash blockMd5 = MD5Hash.digest(crcBuf.getData(), 0, crcBuf.getLength()); + blockMd5.write(checksumBuf); + crcBuf.reset(); + if (length > 0) { // more than one block + crcPerBlock = blocksize / bytesPerCrc; + } + } + } + in.close(); + if (checksumBuf.getLength() == 0) { // empty file + return new MD5MD5CRC32GzipFileChecksum(0, 0, MD5Hash.digest(new byte[32])); + } + MD5Hash md5 = MD5Hash.digest(checksumBuf.getData()); + if (ctype == DataChecksum.Type.CRC32C) { + return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCrc, crcPerBlock, md5); + } else { + return new MD5MD5CRC32GzipFileChecksum(bytesPerCrc, crcPerBlock, md5); + } + } + + @Override + public void concat(final Path dst, final Path[] srcs) throws IOException { + statistics.incrementWriteOps(1); + if (getFileStatus(dst).getLen() == 0) { + throw new IOException(dst + "is empty"); + } + Path dp = dst.getParent(); + for (Path src : srcs) { + if (!src.getParent().equals(dp)) { + throw new HadoopIllegalArgumentException("Source file " + src + + " is not in the same directory with the target " + + dst); + } + } + if (srcs.length == 0) { return; } + byte[][] srcbytes = new byte[srcs.length][]; + int bufsize = 0; + for (int i = 0; i < srcs.length; i++) { + srcbytes[i] = normalizePath(srcs[i]).getBytes("UTF-8"); + bufsize += srcbytes[i].length + 1; + } + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), bufsize); + long offset = 0; + for (int i = 0; i < srcs.length; i++) { + buf.put(offset, srcbytes[i], 0, srcbytes[i].length); + buf.putByte(offset + srcbytes[i].length, (byte) 0); + offset += srcbytes[i].length + 1; + } + int r = lib.jfs_concat(Thread.currentThread().getId(), handle, normalizePath(dst), buf, bufsize); + if (r < 0) { + // TODO: show correct path (one of srcs) + throw error(r, dst); + } + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + statistics.incrementWriteOps(1); + String srcStr = makeQualified(src).toUri().getPath(); + String dstStr = makeQualified(dst).toUri().getPath(); + if (src.equals(dst)) { + FileStatus st = getFileStatus(src); + return st.isFile(); + } + if (dstStr.startsWith(srcStr) && (dstStr.charAt(srcStr.length()) == Path.SEPARATOR_CHAR)) { + return false; + } + int r = lib.jfs_rename(Thread.currentThread().getId(), handle, normalizePath(src), normalizePath(dst)); + if (r == EEXIST) { + try { + FileStatus st = getFileStatus(dst); + if (st.isDirectory()) { + dst = new Path(dst, src.getName()); + r = lib.jfs_rename(Thread.currentThread().getId(), handle, normalizePath(src), normalizePath(dst)); + } else { + return false; + } + } catch (FileNotFoundException ignored) { + } + } + if (r == ENOENT || r == EEXIST) + return false; + if (r < 0) + throw error(r, src); + return true; + } + + @Override + public boolean truncate(Path f, long newLength) throws IOException { + int r = lib.jfs_truncate(Thread.currentThread().getId(), handle, normalizePath(f), newLength); + if (r < 0) + throw error(r, f); + return true; + } + + private boolean rmr(Path p) throws IOException { + int r = lib.jfs_rmr(Thread.currentThread().getId(), handle, normalizePath(p)); + if (r == ENOENT) { + return false; + } + if (r < 0) { + throw error(r, p); + } + return true; + } + + @Override + public boolean delete(Path p, boolean recursive) throws IOException { + statistics.incrementWriteOps(1); + if (recursive) + return rmr(p); + int r = lib.jfs_delete(Thread.currentThread().getId(), handle, normalizePath(p)); + if (r == ENOENT) { + return false; + } + if (r < 0) { + throw error(r, p); + } + return true; + } + + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + statistics.incrementReadOps(1); + String path = normalizePath(f); + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 24); + int r = lib.jfs_summary(Thread.currentThread().getId(), handle, path, buf); + if (r < 0) { + throw error(r, f); + } + long size = buf.getLongLong(0); + long files = buf.getLongLong(8); + long dirs = buf.getLongLong(16); + return new ContentSummary(size, files, dirs); + } + + private FileStatus newFileStatus(Path p, Pointer buf, int size, boolean readlink) throws IOException { + int mode = buf.getInt(0); + boolean isdir = ((mode >>> 31) & 1) == 1; // Go + int stickybit = (mode >>> 20) & 1; + FsPermission perm = new FsPermission((short) ((mode & 0777) | (stickybit << 9))); + long length = buf.getLongLong(4); + long mtime = buf.getLongLong(12); + long atime = buf.getLongLong(20); + String user = buf.getString(28); + String group = buf.getString(28 + user.length() + 1); + assert (30 + user.length() + group.length() == size); + return new FileStatus(length, isdir, 1, blocksize, mtime, atime, perm, user, group, p); + } + + @Override + public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { + statistics.incrementReadOps(1); + int bufsize = 32 << 10; + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), bufsize); // TODO: smaller buff + String path = normalizePath(f); + int r = lib.jfs_listdir(Thread.currentThread().getId(), handle, path, 0, buf, bufsize); + if (r == ENOENT) { + throw new FileNotFoundException(f.toString()); + } + if (r == ENOTDIR) { + return new FileStatus[]{getFileStatus(f)}; + } + + FileStatus[] results; + results = new FileStatus[1024]; + int j = 0; + while (r > 0) { + long offset = 0; + while (offset < r) { + int len = buf.getByte(offset) & 0xff; + byte[] name = new byte[len]; + buf.get(offset + 1, name, 0, len); + offset += 1 + len; + int size = buf.getByte(offset) & 0xff; + if (j == results.length) { + FileStatus[] rs = new FileStatus[results.length * 2]; + System.arraycopy(results, 0, rs, 0, j); + results = rs; + } + Path p = makeQualified(new Path(f, new String(name))); + FileStatus st = newFileStatus(p, buf.slice(offset + 1), size, false); + results[j] = st; + offset += 1 + size; + j++; + } + int left = buf.getInt(offset); + if (left == 0) + break; + int fd = buf.getInt(offset + 4); + r = lib.jfs_listdir(Thread.currentThread().getId(), fd, path, j, buf, bufsize); + } + if (r < 0) { + throw error(r, f); + } + statistics.incrementReadOps(j); + + FileStatus[] sorted = Arrays.copyOf(results, j); + Arrays.sort(sorted, (p1, p2) -> p1.getPath().compareTo(p2.getPath())); + return sorted; + } + + @Override + public void setWorkingDirectory(Path newDir) { + workingDir = fixRelativePart(newDir); + checkPath(workingDir); + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + statistics.incrementWriteOps(1); + if (f == null) { + throw new IllegalArgumentException("mkdirs path arg is null"); + } + String path = normalizePath(f); + if ("/".equals(path)) + return true; + int r = lib.jfs_mkdir(Thread.currentThread().getId(), handle, path, permission.applyUMask(uMask).toShort()); + if (r == 0 || r == EEXIST && !isFile(f)) { + return true; + } else if (r == ENOENT) { + Path parent = f.getParent(); + if (parent != null) { + return mkdirs(parent, permission) && mkdirs(f, permission); + } + } + throw error(r, f.getParent()); + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + statistics.incrementReadOps(1); + try { + return getFileStatusInternal(f, true); + } catch (ParentNotDirectoryException e) { + throw new FileNotFoundException(f.toString()); + } + } + + private FileStatus getFileStatusInternal(final Path f, boolean dereference) throws IOException { + String path = normalizePath(f); + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 130); + int r; + if (dereference) { + r = lib.jfs_stat1(Thread.currentThread().getId(), handle, path, buf); + } else { + r = lib.jfs_lstat1(Thread.currentThread().getId(), handle, path, buf); + } + if (r < 0) { + throw error(r, f); + } + return newFileStatus(makeQualified(f), buf, r, !dereference); + } + + private FileStatus getFileStatusInternalNoException(final Path f) throws IOException { + String path = normalizePath(f); + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 130); + int r = lib.jfs_lstat1(Thread.currentThread().getId(), handle, path, buf); + if (r < 0) { + return null; + } + return newFileStatus(makeQualified(f), buf, r, false); + } + + @Override + public boolean supportsSymlinks() { + return false; + } + + @Override + public String getCanonicalServiceName() { + return null; // Does not support Token + } + + @Override + public FsStatus getStatus(Path p) throws IOException { + statistics.incrementReadOps(1); + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 16); + int r = lib.jfs_statvfs(Thread.currentThread().getId(), handle, buf); + if (r != 0) + throw error(r, p); + long capacity = buf.getLongLong(0); + long remaining = buf.getLongLong(8); + return new FsStatus(capacity, capacity - remaining, remaining); + } + + @Override + public void setPermission(Path p, FsPermission permission) throws IOException { + statistics.incrementWriteOps(1); + int r = lib.jfs_chmod(Thread.currentThread().getId(), handle, normalizePath(p), permission.toShort()); + if (r != 0) + throw error(r, p); + } + + @Override + public void setOwner(Path p, String username, String groupname) throws IOException { + statistics.incrementWriteOps(1); + int r = lib.jfs_setOwner(Thread.currentThread().getId(), handle, normalizePath(p), username, groupname); + if (r != 0) + throw error(r, p); + } + + @Override + public void setTimes(Path p, long mtime, long atime) throws IOException { + statistics.incrementWriteOps(1); + int r = lib.jfs_utime(Thread.currentThread().getId(), handle, normalizePath(p), mtime >= 0 ? mtime : -1, + atime >= 0 ? atime : -1); + if (r != 0) + throw error(r, p); + } + + @Override + public void close() throws IOException { + super.close(); + if (refreshUidThread != null) { + refreshUidThread.shutdownNow(); + } + lib.jfs_term(Thread.currentThread().getId(), handle); + if (nodesFetcherThread != null) { + nodesFetcherThread.shutdownNow(); + } + if (metricsEnable) { + JuiceFSInstrumentation.close(); + } + } + + public void setXAttr(Path path, String name, byte[] value, EnumSet flag) throws IOException { + Pointer buf = Memory.allocate(Runtime.getRuntime(lib), value.length); + buf.put(0, value, 0, value.length); + int mode = 0; // create or replace + if (flag.contains(XAttrSetFlag.CREATE) && flag.contains(XAttrSetFlag.REPLACE)) { + mode = 0; + } else if (flag.contains(XAttrSetFlag.CREATE)) { + mode = 1; + } else if (flag.contains(XAttrSetFlag.REPLACE)) { + mode = 2; + } + int r = lib.jfs_setXattr(Thread.currentThread().getId(), handle, normalizePath(path), name, buf, value.length, + mode); + if (r < 0) + throw error(r, path); + } + + public byte[] getXAttr(Path path, String name) throws IOException { + Pointer buf; + int bufsize = 16 << 10; + int r; + do { + bufsize *= 2; + buf = Memory.allocate(Runtime.getRuntime(lib), bufsize); + r = lib.jfs_getXattr(Thread.currentThread().getId(), handle, normalizePath(path), name, buf, bufsize); + } while (r == bufsize); + if (r == ENOATTR || r == ENODATA) + return null; // attr not found + if (r < 0) + throw error(r, path); + byte[] value = new byte[r]; + buf.get(0, value, 0, r); + return value; + } + + public Map getXAttrs(Path path) throws IOException { + return getXAttrs(path, listXAttrs(path)); + } + + public Map getXAttrs(Path path, List names) throws IOException { + Map result = new HashMap(); + for (String n : names) { + byte[] value = getXAttr(path, n); + if (value != null) { + result.put(n, value); + } + } + return result; + } + + public List listXAttrs(Path path) throws IOException { + Pointer buf; + int bufsize = 1024; + int r; + do { + bufsize *= 2; + buf = Memory.allocate(Runtime.getRuntime(lib), bufsize); + r = lib.jfs_listXattr(Thread.currentThread().getId(), handle, normalizePath(path), buf, bufsize); + } while (r == bufsize); + if (r < 0) + throw error(r, path); + List result = new ArrayList(); + int off = 0, last = 0; + while (off < r) { + if (buf.getByte(off) == 0) { + byte[] arr = new byte[off - last]; + buf.get(last, arr, 0, arr.length); + result.add(new String(arr)); + last = off + 1; + } + off++; + } + return result; + } + + public void removeXAttr(Path path, String name) throws IOException { + int r = lib.jfs_removeXattr(Thread.currentThread().getId(), handle, normalizePath(path), name); + if (r < 0) + throw error(r, path); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/KiteDataLoader.java b/sdk/java/src/main/java/io/juicefs/KiteDataLoader.java new file mode 100644 index 0000000..e4e8212 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/KiteDataLoader.java @@ -0,0 +1,78 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.kitesdk.data.DatasetIOException; +import org.kitesdk.data.DatasetOperationException; +import org.kitesdk.data.spi.*; +import org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Map; + +public class KiteDataLoader implements Loadable { + private static class URIBuilder implements OptionBuilder { + + @Override + public DatasetRepository getFromOptions(Map match) { + String path = match.get("path"); + final Path root = (path == null || path.isEmpty()) ? + new Path("/") : new Path("/", path); + + Configuration conf = DefaultConfiguration.get(); + FileSystem fs; + try { + fs = FileSystem.get(fileSystemURI(match), conf); + } catch (IOException e) { + throw new DatasetIOException("Could not get a FileSystem", e); + } + return new FileSystemDatasetRepository.Builder() + .configuration(new Configuration(conf)) // make a modifiable copy + .rootDirectory(fs.makeQualified(root)) + .build(); + } + } + + @Override + public void load() { + try { + // load hdfs-site.xml by loading HdfsConfiguration + FileSystem.getLocal(DefaultConfiguration.get()); + } catch (IOException e) { + throw new DatasetIOException("Cannot load default config", e); + } + + OptionBuilder builder = new URIBuilder(); + Registration.register( + new URIPattern("jfs:/*path"), + new URIPattern("jfs:/*path/:namespace/:dataset"), + builder); + } + + private static URI fileSystemURI(Map match) { + try { + return new URI(match.get(URIPattern.SCHEME), null, + match.get(URIPattern.HOST), -1, "/", null, null); + } catch (URISyntaxException ex) { + throw new DatasetOperationException("[BUG] Could not build FS URI", ex); + } + } +} diff --git a/sdk/java/src/main/java/io/juicefs/Main.java b/sdk/java/src/main/java/io/juicefs/Main.java new file mode 100644 index 0000000..2a5dbee --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/Main.java @@ -0,0 +1,330 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.Parameters; +import com.sun.management.OperatingSystemMXBean; +import io.juicefs.bench.NNBench; +import io.juicefs.bench.TestDFSIO; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.VersionInfo; + +import java.io.Closeable; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.nio.file.*; +import java.text.DecimalFormat; +import java.util.*; +import java.util.stream.Stream; + +public class Main { + private static final Map COMMAND = new HashMap<>(); + + @Parameter(names = {"--help", "-h", "-help"}, help = true) + private boolean help = false; + + public abstract static class Command implements Closeable { + @Parameter(names = {"--help", "-h", "-help"}, help = true) + public boolean help; + + public Command() { + COMMAND.put(getCommand(), this); + } + + public abstract void init() throws IOException; + + public abstract void run() throws IOException; + + public abstract String getCommand(); + + } + + @Parameters(commandDescription = "Show JuiceFS Information") + private static class CommandShowInfo extends Command { + @Override + public void close() throws IOException { + } + + static class CacheDisk { + String name; + List cacheDirs; + String type; + long diskSize; + long jfsUsedSize; + long freeSize; + + public CacheDisk(String name, List cacheDirs) { + this.name = name; + this.cacheDirs = cacheDirs; + this.type = findDiskType(name); + this.jfsUsedSize = cacheDirs.stream().mapToLong(d -> getDirectorySize(Paths.get(d))).sum(); + try { + this.diskSize = Files.getFileStore(Paths.get(cacheDirs.get(0))).getTotalSpace(); + this.freeSize = Files.getFileStore(Paths.get(cacheDirs.get(0))).getUsableSpace(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String findDiskType(String deviceName) { + if (deviceName.equals("RAM")) { + return "MEM"; + } + String s; + try { + s = Shell.execCommand("sh", "-c", "cat /sys/block/" + deviceName + "/queue/rotational").trim(); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (s.equals("1")) { + return "HDD"; + } else if (s.equals("0")) { + return "SSD"; + } else { + throw new RuntimeException("unknown disk type"); + } + } + + private long getDirectorySize(Path path) { + long size; + try (Stream walk = Files.walk(path)) { + size = walk + .filter(Files::isRegularFile) + .mapToLong(p -> { + try { + return Files.size(p); + } catch (IOException e) { + System.err.printf("Failed to get size of %s%n%s", p, e); + return 0L; + } + }) + .sum(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return size; + } + + private static String parseSize(long size) { + int GB = 1 << 30; + int MB = 1 << 20; + int KB = 1 << 10; + DecimalFormat df = new DecimalFormat("0.0"); + String resultSize; + if (size / GB >= 1) { + resultSize = df.format(size / (float) GB) + "GiB"; + } else if (size / MB >= 1) { + resultSize = df.format(size / (float) MB) + "MiB"; + } else if (size / KB >= 1) { + resultSize = df.format(size / (float) KB) + "KiB"; + } else { + resultSize = size + "B"; + } + return resultSize; + } + + @Override + public String toString() { + DecimalFormat df = new DecimalFormat("0.00"); + float freeRatio = (float) freeSize / diskSize; + final StringJoiner sj = new StringJoiner("\n"); + sj.add(" " + name + ":"); + if (cacheDirs.size() == 1) { + sj.add("\tcacheDir=" + cacheDirs.get(0)); + } else { + sj.add("\tcacheDirs=" + cacheDirs); + } + sj.add("\ttype=" + type) + .add("\tdiskSize=" + parseSize(diskSize)) + .add("\tjfsUsedSize=" + parseSize(jfsUsedSize)) + .add("\tfreeRatio=" + df.format(freeRatio)); + return sj.add("\n").toString(); + } + } + + private final Configuration conf; + + public CommandShowInfo() { + conf = new Configuration(); + } + + @Override + public void init() throws IOException { + + } + + private void showJFSConf() { + System.out.println("JUICEFS CONF:"); + Map jfsConf = conf.getValByRegex("juicefs*"); + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : jfsConf.entrySet()) { + sb.append("\t").append(entry.getKey()).append("=").append(entry.getValue()).append("\n"); + } + System.out.println(sb); + } + + private void showCacheInfo() { + System.out.println("CACHE INFO:"); + final Map cacheDir = conf.getValByRegex("juicefs.*cache-dir"); + final Map cacheSize = conf.getValByRegex("juicefs.*cache-size"); + + for (Map.Entry entry : cacheSize.entrySet()) { + String jfsName = entry.getKey().split("\\.").length == 3 ? entry.getKey().split("\\.")[1] : ""; + if (!jfsName.equals("")) { + System.out.println("- " + jfsName); + } + System.out.println("\tcacheSize=" + cacheSize.getOrDefault("juicefs." + jfsName + ".cache-size", + cacheSize.getOrDefault("juicefs.cache-size", "100")) + "MiB"); + } + + for (Map.Entry entry : cacheDir.entrySet()) { + String jfsName = entry.getKey().split("\\.").length == 3 ? entry.getKey().split("\\.")[1] : ""; + if (!jfsName.equals("")) { + System.out.println("- " + jfsName); + } + Map> disk2Dirs = new HashMap<>(); + List expandDirs = new ArrayList<>(); + String[] patterns = entry.getValue().split(":"); + for (String pattern : patterns) { + expandDirs.addAll(expandDir(pattern)); + } + for (String dir : expandDirs) { + String disk = findDisk(dir); + disk2Dirs.computeIfAbsent(disk, s -> new ArrayList<>()).add(dir); + } + for (Map.Entry> disk2Dir : disk2Dirs.entrySet()) { + System.out.println(new CacheDisk(disk2Dir.getKey(), disk2Dir.getValue())); + } + } + } + + private String findDisk(String dir) { + if (dir.trim().startsWith("/dev/shm")) { + return "RAM"; + } + try { + String pname = Shell.execCommand("sh", "-c", "df -P " + dir + " | tail -1 | cut -d' ' -f 1 | rev | cut -d '/' -f 1 | rev").trim(); + return Shell.execCommand("sh", "-c", "basename \"$(readlink -f \"/sys/class/block/" + pname + "/..\")\"").trim(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private boolean hasMeta(String path) { + String chars = "*?["; + if (!System.getProperty("os.name").toLowerCase().contains("windows")) { + chars = "*?[\\"; + } + for (char c : chars.toCharArray()) { + if (path.contains(String.valueOf(c))) { + return true; + } + } + return false; + } + + private List expandDir(String path) { + if (!hasMeta(path)) { + return Collections.singletonList(path); + } + List res = new ArrayList<>(); + String p = Paths.get(path).getParent().toString(); + String f = Paths.get(path).getFileName().toString(); + try (DirectoryStream paths = Files.newDirectoryStream(Paths.get(p), f)) { + paths.iterator().forEachRemaining(i -> res.add(i.toString())); + return res; + } catch (NoSuchFileException e) { + Path parent = Paths.get(path).getParent(); + List expands = expandDir(parent.toString()); + for (String expand : expands) { + String d = Paths.get(expand, f).toString(); + if (Files.exists(Paths.get(d))) { + res.add(d); + } + } + return res; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void showEnv() throws IOException { + System.out.println("ENV"); + Map env = new LinkedHashMap<>(); + + env.put("cpu", String.valueOf(Runtime.getRuntime().availableProcessors())); + OperatingSystemMXBean osmxb = (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean(); + env.put("cpu_percent", String.format("%.1f%%", osmxb.getSystemCpuLoad() * 100)); + env.put("total_mem", (osmxb.getTotalPhysicalMemorySize() >> 30) + "GiB"); + env.put("free_mem", (osmxb.getFreePhysicalMemorySize() >> 30) + "GiB"); + env.put("file.encoding", System.getProperty("file.encoding")); + env.put("linux", Shell.execCommand("uname", "-r").trim()); + env.put("hadoop", VersionInfo.getVersion()); + env.put("java.version", System.getProperty("java.version")); + env.put("java.home", System.getProperty("java.home")); + + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : env.entrySet()) { + sb.append("\t").append(entry.getKey()).append("=").append(entry.getValue()).append("\n"); + } + System.out.println(sb); + } + + public void run() throws IOException { + showJFSConf(); + showCacheInfo(); + showEnv(); + } + + @Override + public String getCommand() { + return "info"; + } + } + + + public static void main(String[] args) throws ParseException, IOException { + Main main = new Main(); + Command showInfo = new CommandShowInfo(); + Command dfsio = new TestDFSIO(); + Command nnbench = new NNBench(); + JCommander jc = JCommander.newBuilder() + .addObject(main) + .addCommand(showInfo.getCommand(), showInfo) + .addCommand(dfsio.getCommand(), dfsio) + .addCommand(nnbench.getCommand(), nnbench) + .build(); + jc.parse(args); + + if (main.help) { + jc.usage(); + return; + } + + Command command = COMMAND.get(jc.getParsedCommand()); + if (command.help) { + jc.getCommands().get(jc.getParsedCommand()).usage(); + return; + } + command.init(); + command.run(); + command.close(); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/bench/AccumulatingReducer.java b/sdk/java/src/main/java/io/juicefs/bench/AccumulatingReducer.java new file mode 100644 index 0000000..3697e2c --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/bench/AccumulatingReducer.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.bench; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; + +import java.io.IOException; +import java.util.Iterator; + +/** + * Reducer that accumulates values based on their type. + *

+ * The type is specified in the key part of the key-value pair + * as a prefix to the key in the following way + *

+ * type:key + *

+ * The values are accumulated according to the types: + *

    + *
  • s: - string, concatenate
  • + *
  • f: - float, summ
  • + *
  • l: - long, summ
  • + *
+ */ +@SuppressWarnings("deprecation") +public class AccumulatingReducer extends MapReduceBase + implements Reducer { + static final String VALUE_TYPE_LONG = "l:"; + static final String VALUE_TYPE_FLOAT = "f:"; + static final String VALUE_TYPE_STRING = "s:"; + private static final Log LOG = LogFactory.getLog(AccumulatingReducer.class); + + protected String hostName; + + public AccumulatingReducer() { + try { + hostName = java.net.InetAddress.getLocalHost().getHostName(); + } catch (Exception e) { + hostName = "localhost"; + } + LOG.info("Starting AccumulatingReducer on " + hostName); + } + + @Override + public void reduce(Text key, + Iterator values, + OutputCollector output, + Reporter reporter + ) throws IOException { + String field = key.toString(); + + reporter.setStatus("starting " + field + " ::host = " + hostName); + + // concatenate strings + if (field.startsWith(VALUE_TYPE_STRING)) { + StringBuffer sSum = new StringBuffer(); + while (values.hasNext()) + sSum.append(values.next().toString()).append(";"); + output.collect(key, new Text(sSum.toString())); + reporter.setStatus("finished " + field + " ::host = " + hostName); + return; + } + // sum long values + if (field.startsWith(VALUE_TYPE_FLOAT)) { + float fSum = 0; + while (values.hasNext()) + fSum += Float.parseFloat(values.next().toString()); + output.collect(key, new Text(String.valueOf(fSum))); + reporter.setStatus("finished " + field + " ::host = " + hostName); + return; + } + // sum long values + if (field.startsWith(VALUE_TYPE_LONG)) { + long lSum = 0; + while (values.hasNext()) { + lSum += Long.parseLong(values.next().toString()); + } + output.collect(key, new Text(String.valueOf(lSum))); + } + reporter.setStatus("finished " + field + " ::host = " + hostName); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/bench/IOMapperBase.java b/sdk/java/src/main/java/io/juicefs/bench/IOMapperBase.java new file mode 100644 index 0000000..8346aad --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/bench/IOMapperBase.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.bench; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +public abstract class IOMapperBase extends Configured + implements Mapper { + private static final Log LOG = LogFactory.getLog(IOMapperBase.class); + + protected String hostName; + protected Closeable stream; + protected int threadsPerMap; + protected int filesPerThread; + protected ExecutorService pool; + + public IOMapperBase() { + } + + @Override + public void configure(JobConf conf) { + setConf(conf); + + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (Exception e) { + hostName = "localhost"; + } + threadsPerMap = conf.getInt("test.threadsPerMap", 1); + filesPerThread = conf.getInt("test.filesPerThread", 1); + pool = Executors.newFixedThreadPool(threadsPerMap, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + return t; + }); + } + + @Override + public void close() throws IOException { + pool.shutdown(); + } + + abstract Long doIO(Reporter reporter, + String name, + long value, Closeable stream) throws IOException; + + + public Closeable getIOStream(String name) throws IOException { + return null; + } + + abstract void collectStats(OutputCollector output, + String name, + long execTime, + Long doIOReturnValue) throws IOException; + + @Override + public void map(Text key, + LongWritable value, + OutputCollector output, + Reporter reporter) throws IOException { + String name = key.toString(); + long longValue = value.get(); + + reporter.setStatus("starting " + name + " ::host = " + hostName); + AtomicLong execTime = new AtomicLong(0L); + List> futures = new ArrayList<>(threadsPerMap); + for (int i = 0; i < threadsPerMap; i++) { + int id = i; + Future future = pool.submit(() -> { + long res = 0; + for (int j = 0; j < filesPerThread; j++) { + String filePath = String.format("%s/thread-%s/file-%s", name, id, j); + try (Closeable stream = getIOStream(filePath)) { + long tStart = System.currentTimeMillis(); + res += doIO(reporter, name, longValue, stream); + long tEnd = System.currentTimeMillis(); + execTime.addAndGet(tEnd - tStart); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return res; + }); + futures.add(future); + } + + Long result = 0L; + try { + for (Future future : futures) { + result += future.get(); + } + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + + collectStats(output, name, execTime.get(), result); + + reporter.setStatus("finished " + name + " ::host = " + hostName); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/bench/NNBench.java b/sdk/java/src/main/java/io/juicefs/bench/NNBench.java new file mode 100644 index 0000000..f90036a --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/bench/NNBench.java @@ -0,0 +1,781 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.bench; + +import com.beust.jcommander.Parameter; +import com.beust.jcommander.Parameters; +import io.juicefs.Main; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.*; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +@Parameters(commandDescription = "Distributed create/open/rename/delete meta benchmark") +public class NNBench extends Main.Command { + private static final Log LOG = LogFactory.getLog( + NNBench.class); + + protected static String CONTROL_DIR_NAME = "control"; + protected static String OUTPUT_DIR_NAME = "output"; + protected static String DATA_DIR_NAME = "data"; + + public static long startTime = + System.currentTimeMillis() + (30 * 1000); // default is 'now' + 30s + + @Parameter(description = "[create | open | rename | delete]", required = true) + public static String operation; + @Parameter(names = {"-maps"}, description = "number of maps") + public long numberOfMaps = 1l; // default is 1 + @Parameter(names = {"-files"}, description = "number of files per thread") + public long numberOfFiles = 1l; // default is 1 + @Parameter(names = {"-threads"}, description = "threads per map") + public int threadsPerMap = 1; + public long numberOfReduces = 1l; // default is 1 + @Parameter(names = {"-baseDir"}, description = "full path of dir on FileSystem", required = true) + public String baseDir = "/benchmarks/NNBench"; // default + @Parameter(names = {"-deleteBeforeRename"}, description = "delete files before or after rename operation") + public static boolean deleteBeforeRename; + @Parameter(names = {"-local"}, description = "run in local single process") + private boolean local; + + // Supported operations + private static final String OP_CREATE = "create"; + private static final String OP_OPEN = "open"; + private static final String OP_RENAME = "rename"; + private static final String OP_DELETE = "delete"; + + // To display in the format that matches the NN and DN log format + // Example: 2007-10-26 00:01:19,853 + static SimpleDateFormat sdf = + new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss','S"); + + private static Configuration config = new Configuration(); + + /** + * Clean up the files before a test run + * + * @throws IOException on error + */ + private void cleanupBeforeTestrun() throws IOException { + FileSystem tempFS = new Path(baseDir).getFileSystem(config); + + // Delete the data directory only if it is the create/write operation + if (operation.equals(OP_CREATE)) { + LOG.info("Deleting data directory"); + tempFS.delete(new Path(baseDir, DATA_DIR_NAME), true); + } + tempFS.delete(new Path(baseDir, CONTROL_DIR_NAME), true); + tempFS.delete(new Path(baseDir, OUTPUT_DIR_NAME), true); + } + + /** + * Create control files before a test run. + * Number of files created is equal to the number of maps specified + * + * @throws IOException on error + */ + private void createControlFiles() throws IOException { + FileSystem tempFS = new Path(baseDir).getFileSystem(config); + LOG.info("Creating " + numberOfMaps + " control files"); + + for (int i = 0; i < numberOfMaps; i++) { + String strFileName = "NNBench_Controlfile_" + i; + Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME), + strFileName); + + SequenceFile.Writer writer = null; + try { + writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class, + LongWritable.class, CompressionType.NONE); + writer.append(new Text(strFileName), new LongWritable(i)); + } finally { + if (writer != null) { + writer.close(); + } + } + } + } + + /** + * Analyze the results + * + * @throws IOException on error + */ + private void analyzeResults() throws IOException { + final FileSystem fs = new Path(baseDir).getFileSystem(config); + Path reduceFile = new Path(new Path(baseDir, OUTPUT_DIR_NAME), + "part-00000"); + + DataInputStream in; + in = new DataInputStream(fs.open(reduceFile)); + + BufferedReader lines; + lines = new BufferedReader(new InputStreamReader(in)); + + long totalTime = 0l; + long lateMaps = 0l; + long numOfExceptions = 0l; + long successfulFileOps = 0l; + + long mapStartTimeTPmS = 0l; + long mapEndTimeTPmS = 0l; + + String resultTPSLine1 = null; + String resultALLine1 = null; + + String line; + while ((line = lines.readLine()) != null) { + StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;"); + String attr = tokens.nextToken(); + if (attr.endsWith(":totalTime")) { + totalTime = Long.parseLong(tokens.nextToken()); + } else if (attr.endsWith(":latemaps")) { + lateMaps = Long.parseLong(tokens.nextToken()); + } else if (attr.endsWith(":numOfExceptions")) { + numOfExceptions = Long.parseLong(tokens.nextToken()); + } else if (attr.endsWith(":successfulFileOps")) { + successfulFileOps = Long.parseLong(tokens.nextToken()); + } else if (attr.endsWith(":mapStartTimeTPmS")) { + mapStartTimeTPmS = Long.parseLong(tokens.nextToken()); + } else if (attr.endsWith(":mapEndTimeTPmS")) { + mapEndTimeTPmS = Long.parseLong(tokens.nextToken()); + } + } + + // Average latency is the average time to perform 'n' number of + // operations, n being the number of files + double avgLatency = (double) totalTime / successfulFileOps; + + double totalTimeTPS = + (double) (1000 * successfulFileOps) / (mapEndTimeTPmS - mapStartTimeTPmS); + + if (operation.equals(OP_CREATE)) { + resultTPSLine1 = " TPS: Create: " + + (int) (totalTimeTPS); + resultALLine1 = " Avg Lat (ms): Create: " + avgLatency; + } else if (operation.equals(OP_OPEN)) { + resultTPSLine1 = " TPS: Open: " + + (int) totalTimeTPS; + resultALLine1 = " Avg Lat (ms): Open: " + avgLatency; + } else if (operation.equals(OP_RENAME)) { + resultTPSLine1 = " TPS: Rename: " + + (int) totalTimeTPS; + resultALLine1 = " Avg Lat (ms): Rename: " + avgLatency; + } else if (operation.equals(OP_DELETE)) { + resultTPSLine1 = " TPS: Delete: " + + (int) totalTimeTPS; + resultALLine1 = " Avg Lat (ms): Delete: " + avgLatency; + } + + String resultLines[] = { + "-------------- NNBench -------------- : ", + " Date & time: " + sdf.format(new Date( + System.currentTimeMillis())), + "", + " Test Operation: " + operation, + " Start time: " + + sdf.format(new Date(startTime)), + " Maps to run: " + numberOfMaps, + " Threads per map: " + threadsPerMap, + " Files per thread: " + numberOfFiles, + " Successful file operations: " + successfulFileOps, + "", + " # maps that missed the barrier: " + lateMaps, + " # exceptions: " + numOfExceptions, + "", + resultTPSLine1, + resultALLine1, + "", + " RAW DATA: TPS Total (ms): " + totalTime, + " RAW DATA: Job Duration (ms): " + (mapEndTimeTPmS - mapStartTimeTPmS), + " RAW DATA: Late maps: " + lateMaps, + " RAW DATA: # of exceptions: " + numOfExceptions, + ""}; + + // Write to a file and also dump to log + for (int i = 0; i < resultLines.length; i++) { + LOG.info(resultLines[i]); + } + } + + /** + * Run the test + * + * @throws IOException on error + */ + public void runTests() throws IOException { + + JobConf job = new JobConf(config, NNBench.class); + + job.setJobName("NNBench-" + operation); + FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME)); + job.setInputFormat(SequenceFileInputFormat.class); + + // Explicitly set number of max map attempts to 1. + job.setMaxMapAttempts(1); + + // Explicitly turn off speculative execution + job.setSpeculativeExecution(false); + + job.setMapperClass(NNBenchMapper.class); + job.setReducerClass(NNBenchReducer.class); + + FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME)); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks((int) numberOfReduces); + JobClient.runJob(job); + } + + /** + * Validate the inputs + */ + public void validateInputs() { + // If it is not one of the four operations, then fail + if (!operation.equals(OP_CREATE) && + !operation.equals(OP_OPEN) && + !operation.equals(OP_RENAME) && + !operation.equals(OP_DELETE)) { + System.err.println("Error: Unknown operation: " + operation); + System.exit(-1); + } + + // If number of maps is a negative number, then fail + // Hadoop allows the number of maps to be 0 + if (numberOfMaps < 0) { + System.err.println("Error: Number of maps must be a positive number"); + System.exit(-1); + } + + // If number of reduces is a negative number or 0, then fail + if (numberOfReduces <= 0) { + System.err.println("Error: Number of reduces must be a positive number"); + System.exit(-1); + } + + // If number of files is a negative number, then fail + if (numberOfFiles < 0) { + System.err.println("Error: Number of files must be a positive number"); + System.exit(-1); + } + } + + @Override + public void init() throws IOException { + LOG.info("Test Inputs: "); + LOG.info(" Test Operation: " + operation); + LOG.info(" Start time: " + sdf.format(new Date(startTime))); + if (!local) { + LOG.info(" Number of maps: " + numberOfMaps); + } + LOG.info("Number of threads per map: " + threadsPerMap); + LOG.info(" Number of files: " + numberOfFiles); + LOG.info(" Base dir: " + baseDir); + + // Set user-defined parameters, so the map method can access the values + config.set("test.nnbench.operation", operation); + config.setLong("test.nnbench.maps", numberOfMaps); + config.setLong("test.nnbench.reduces", numberOfReduces); + config.setLong("test.nnbench.starttime", startTime); + config.setLong("test.nnbench.numberoffiles", numberOfFiles); + config.set("test.nnbench.basedir", baseDir); + config.setInt("test.nnbench.threadsPerMap", threadsPerMap); + config.setBoolean("test.nnbench.deleteBeforeRename", deleteBeforeRename); + config.setBoolean("test.nnbench.local", local); + + config.set("test.nnbench.datadir.name", DATA_DIR_NAME); + config.set("test.nnbench.outputdir.name", OUTPUT_DIR_NAME); + config.set("test.nnbench.controldir.name", CONTROL_DIR_NAME); + } + + @Override + public void run() throws IOException { + validateInputs(); + cleanupBeforeTestrun(); + if (local) { + localRun(); + return; + } + createControlFiles(); + runTests(); + analyzeResults(); + } + + private void localRun() { + NNBenchMapper mapper = new NNBenchMapper(); + mapper.configure(new JobConf(config)); + + ExecutorService pool = Executors.newFixedThreadPool(threadsPerMap, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + return t; + }); + + long start = System.currentTimeMillis(); + for (int i = 0; i < threadsPerMap; i++) { + int threadNum = i; + pool.submit(() -> { + try { + mapper.doMap(Collections.synchronizedList(new ArrayList<>()), 0, threadNum); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + throw new RuntimeException(e); + } + }); + } + pool.shutdown(); + try { + pool.awaitTermination(1, TimeUnit.DAYS); + } catch (InterruptedException ignored) { + } + long end = System.currentTimeMillis(); + double totalTimeTPS = + (double) (1000 * threadsPerMap * numberOfFiles) / (end - start); + String[] resultLines = { + "-------------- NNBench -------------- : ", + " Date & time: " + sdf.format(new Date( + System.currentTimeMillis())), + "", + " Test Operation: " + operation, + " Start time: " + + sdf.format(new Date(startTime)), + " Threads: " + threadsPerMap, + " Files per thread: " + numberOfFiles, + " Successful file operations: " + threadsPerMap * numberOfFiles, + "", + " TPS: Create: " + (int) (totalTimeTPS), + " Avg Lat (ms): Create: " + String.format("%.2f", (double) (end - start) / (threadsPerMap * numberOfFiles)), + "", + " RAW DATA: Job Duration (ms): " + (end - start), + ""}; + + for (int i = 0; i < resultLines.length; i++) { + LOG.info(resultLines[i]); + } + } + + @Override + public String getCommand() { + return "nnbench"; + } + + @Override + public void close() throws IOException { + + } + + /** + * Mapper class + */ + static class NNBenchMapper extends Configured + implements Mapper { + FileSystem filesystem = null; + + long numberOfFiles = 1l; + boolean beforeRename = false; + String baseDir = null; + String dataDirName = null; + String op = null; + final int MAX_OPERATION_EXCEPTIONS = 1000; + int threadsPerMap = 1; + boolean local; + + ExecutorService executorService; + + // Data to collect from the operation + + /** + * Constructor + */ + public NNBenchMapper() { + } + + + /** + * Mapper base implementation + */ + public void configure(JobConf conf) { + setConf(conf); + local = conf.getBoolean("test.nnbench.local", false); + try { + baseDir = conf.get("test.nnbench.basedir"); + filesystem = new Path(baseDir).getFileSystem(conf); + } catch (Exception e) { + throw new RuntimeException("Cannot get file system.", e); + } + + numberOfFiles = conf.getLong("test.nnbench.numberoffiles", 1l); + dataDirName = conf.get("test.nnbench.datadir.name"); + op = conf.get("test.nnbench.operation"); + beforeRename = conf.getBoolean("test.nnbench.deleteBeforeRename", false); + + threadsPerMap = conf.getInt("test.nnbench.threadsPerMap", 1); + executorService = Executors.newFixedThreadPool(threadsPerMap, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + return t; + }); + } + + /** + * Mapper base implementation + */ + public void close() throws IOException { + } + + /** + * Returns when the current number of seconds from the epoch equals + * the command line argument given by -startTime. + * This allows multiple instances of this program, running on clock + * synchronized nodes, to start at roughly the same time. + * + * @return true if the method was able to sleep for -startTime + * without interruption; false otherwise + */ + private boolean barrier() { + if (local) { + return true; + } + long startTime = getConf().getLong("test.nnbench.starttime", 0l); + long currentTime = System.currentTimeMillis(); + long sleepTime = startTime - currentTime; + boolean retVal = false; + + // If the sleep time is greater than 0, then sleep and return + if (sleepTime > 0) { + LOG.info("Waiting in barrier for: " + sleepTime + " ms"); + + try { + Thread.sleep(sleepTime); + retVal = true; + } catch (Exception e) { + retVal = false; + } + } + + return retVal; + } + + /** + * Map method + */ + public void map(Text key, + LongWritable value, + OutputCollector output, + Reporter reporter) throws IOException { + + + List res = Collections.synchronizedList(new ArrayList<>()); + + for (int i = 0; i < threadsPerMap; i++) { + int threadNum = i; + executorService.submit(() -> { + try { + doMap(res, value.get(), threadNum); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + executorService.shutdown(); + try { + executorService.awaitTermination(1, TimeUnit.DAYS); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + long successOps = 0L; + for (Entry entry : res) { + if (entry.key.toString().contains("successfulFileOps")) { + successOps += Long.parseLong(entry.value.toString()); + } + output.collect(entry.key, entry.value); + } + reporter.setStatus("Finish " + successOps + " files"); + } + + static class Entry { + Text key; + Text value; + + Entry(Text key, Text value) { + this.key = key; + this.value = value; + } + } + + private void doMap(List res, long mapId, int threadNum) throws IOException { + long startTimeTPmS = 0l; + long endTimeTPms = 0l; + + AtomicLong successfulFileOps = new AtomicLong(0L); + AtomicInteger numOfExceptions = new AtomicInteger(0); + AtomicLong totalTime = new AtomicLong(0L); + + if (barrier()) { + startTimeTPmS = System.currentTimeMillis(); + if (op.equals(OP_CREATE)) { + doCreate(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum); + } else if (op.equals(OP_OPEN)) { + doOpen(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum); + } else if (op.equals(OP_RENAME)) { + doRenameOp(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum); + } else if (op.equals(OP_DELETE)) { + doDeleteOp(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum); + } + + endTimeTPms = System.currentTimeMillis(); + } else { + res.add(new Entry(new Text("l:latemaps"), new Text("1"))); + } + + // collect after the map end time is measured + res.add(new Entry(new Text("l:totalTime"), + new Text(String.valueOf(totalTime.get())))); + res.add(new Entry(new Text("l:numOfExceptions"), + new Text(String.valueOf(numOfExceptions.get())))); + res.add(new Entry(new Text("l:successfulFileOps"), + new Text(String.valueOf(successfulFileOps.get())))); + res.add(new Entry(new Text("min:mapStartTimeTPmS"), + new Text(String.valueOf(startTimeTPmS)))); + res.add(new Entry(new Text("max:mapEndTimeTPmS"), + new Text(String.valueOf(endTimeTPms)))); + } + + /** + * Create operation. + */ + private void doCreate(long mapId, + AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException { + FSDataOutputStream out; + + for (long l = 0L; l < numberOfFiles; l++) { + Path filePath = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l))); + boolean successfulOp = false; + while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) { + try { + // Set up timer for measuring AL (transaction #1) + long startTime = System.currentTimeMillis(); + // Create the file + out = filesystem.create(filePath, false); + out.close(); + totalTime.addAndGet(System.currentTimeMillis() - startTime); + successfulFileOps.getAndIncrement(); + successfulOp = true; + } catch (IOException e) { + LOG.info("Exception recorded in op: " + + "Create", e); + numOfExceptions.getAndIncrement(); + throw e; + } + } + } + } + + /** + * Open operation + */ + private void doOpen(long mapId, + AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException { + FSDataInputStream input; + + for (long l = 0L; l < numberOfFiles; l++) { + Path filePath = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l))); + + boolean successfulOp = false; + while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) { + try { + // Set up timer for measuring AL + long startTime = System.currentTimeMillis(); + input = filesystem.open(filePath); + input.close(); + totalTime.addAndGet(System.currentTimeMillis() - startTime); + successfulFileOps.getAndIncrement(); + successfulOp = true; + } catch (IOException e) { + LOG.info("Exception recorded in op: OpenRead " + e); + numOfExceptions.getAndIncrement(); + throw e; + } + } + } + } + + /** + * Rename operation + */ + private void doRenameOp(long mapId, + AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException { + for (long l = 0L; l < numberOfFiles; l++) { + Path filePath = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l))); + Path filePathR = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_r_" + l))); + + boolean successfulOp = false; + while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) { + try { + // Set up timer for measuring AL + long startTime = System.currentTimeMillis(); + filesystem.rename(filePath, filePathR); + totalTime.addAndGet(System.currentTimeMillis() - startTime); + successfulFileOps.getAndIncrement(); + successfulOp = true; + } catch (IOException e) { + LOG.info("Exception recorded in op: Rename"); + numOfExceptions.getAndIncrement(); + throw e; + } + } + } + } + + /** + * Delete operation + */ + private void doDeleteOp(long mapId, + AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException { + for (long l = 0L; l < numberOfFiles; l++) { + Path filePath; + if (beforeRename) { + filePath = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l))); + } else { + filePath = new Path(new Path(baseDir, dataDirName), + new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_r_" + l))); + } + + boolean successfulOp = false; + while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) { + try { + // Set up timer for measuring AL + long startTime = System.currentTimeMillis(); + filesystem.delete(filePath, false); + totalTime.addAndGet(System.currentTimeMillis() - startTime); + successfulFileOps.getAndIncrement(); + successfulOp = true; + } catch (IOException e) { + LOG.info("Exception in recorded op: Delete"); + numOfExceptions.getAndIncrement(); + throw e; + } + } + } + } + } + + /** + * Reducer class + */ + static class NNBenchReducer extends MapReduceBase + implements Reducer { + + protected String hostName; + + public NNBenchReducer() { + LOG.info("Starting NNBenchReducer !!!"); + try { + hostName = java.net.InetAddress.getLocalHost().getHostName(); + } catch (Exception e) { + hostName = "localhost"; + } + LOG.info("Starting NNBenchReducer on " + hostName); + } + + /** + * Reduce method + */ + public void reduce(Text key, + Iterator values, + OutputCollector output, + Reporter reporter + ) throws IOException { + String field = key.toString(); + + reporter.setStatus("starting " + field + " ::host = " + hostName); + + // sum long values + if (field.startsWith("l:")) { + long lSum = 0; + while (values.hasNext()) { + lSum += Long.parseLong(values.next().toString()); + } + output.collect(key, new Text(String.valueOf(lSum))); + } + + if (field.startsWith("min:")) { + long minVal = -1; + while (values.hasNext()) { + long value = Long.parseLong(values.next().toString()); + + if (minVal == -1) { + minVal = value; + } else { + if (value != 0 && value < minVal) { + minVal = value; + } + } + } + output.collect(key, new Text(String.valueOf(minVal))); + } + + if (field.startsWith("max:")) { + long maxVal = -1; + while (values.hasNext()) { + long value = Long.parseLong(values.next().toString()); + + if (maxVal == -1) { + maxVal = value; + } else { + if (value > maxVal) { + maxVal = value; + } + } + } + output.collect(key, new Text(String.valueOf(maxVal))); + } + + reporter.setStatus("finished " + field + " ::host = " + hostName); + } + } +} diff --git a/sdk/java/src/main/java/io/juicefs/bench/TestDFSIO.java b/sdk/java/src/main/java/io/juicefs/bench/TestDFSIO.java new file mode 100644 index 0000000..20e47e4 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/bench/TestDFSIO.java @@ -0,0 +1,927 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.bench; + +import com.beust.jcommander.Parameter; +import com.beust.jcommander.Parameters; +import io.juicefs.Main; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.ReflectionUtils; + +import java.io.*; +import java.text.DecimalFormat; +import java.util.Date; +import java.util.Locale; +import java.util.StringTokenizer; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + + +@Parameters(commandDescription = "Distributed i/o benchmark") +public class TestDFSIO extends Main.Command { + // Constants + private static final Log LOG = LogFactory.getLog(TestDFSIO.class); + private static final String BASE_FILE_NAME = "test_io_"; + private static final long MEGA = ByteMultiple.MB.value(); + + @Parameter(description = "[-read | -write]", required = true) + private String testType; + @Parameter(names = {"-random"}, description = "random read") + private boolean random; + @Parameter(names = {"-backward"}, description = "backward read") + private boolean backward; + @Parameter(names = {"-skip"}, description = "skip read") + private boolean skip; + @Parameter(names = {"-local"}, description = "run in local single process") + private boolean local; + + @Parameter(names = {"-baseDir"}, description = "full path of dir on FileSystem", required = true) + private String baseDir = "/benchmarks/DFSIO"; + + @Parameter(names = {"-bufferSize"}, description = "bufferSize[B|KB|MB|GB|TB]") + private String bufferSize = "1MB"; + @Parameter(names = {"-size"}, description = "per file size[B|KB|MB|GB|TB]") + private String size = "1GB"; + @Parameter(names = {"-maps"}, description = "number of maps") + private int maps = 1; + @Parameter(names = {"-threads"}, description = "threads per map") + private int threadsPerMap = 1; + @Parameter(names = {"-files"}, description = "number of files per thread") + private int filesPerThread = 1; + @Parameter(names = {"-skipSize"}, description = "skipSize[B|KB|MB|GB|TB]") + private String skipSize; + @Parameter(names = {"-compression"}, description = "codecClassName") + String compression = null; + @Parameter(names = {"-randomBytes"}, description = "generate randomBytes") + boolean randomBytes = false; + + private FileSystem fs; + private TestType type; + private Configuration config; + + @Override + public void close() throws IOException { + this.fs.close(); + } + + private enum TestType { + TEST_TYPE_READ("read"), + TEST_TYPE_WRITE("write"), + TEST_TYPE_CLEANUP("cleanup"), + TEST_TYPE_APPEND("append"), + TEST_TYPE_READ_RANDOM("random read"), + TEST_TYPE_READ_BACKWARD("backward read"), + TEST_TYPE_READ_SKIP("skip read"), + TEST_TYPE_TRUNCATE("truncate"); + + private String type; + + TestType(String t) { + type = t; + } + + @Override // String + public String toString() { + return type; + } + } + + static enum ByteMultiple { + B(1L), + KB(0x400L), + MB(0x100000L), + GB(0x40000000L), + TB(0x10000000000L); + + private long multiplier; + + private ByteMultiple(long mult) { + multiplier = mult; + } + + long value() { + return multiplier; + } + + static ByteMultiple parseString(String sMultiple) { + if (sMultiple == null || sMultiple.isEmpty()) // MB by default + { + return MB; + } + String sMU = sMultiple.toUpperCase(Locale.ENGLISH); + if (B.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) { + return B; + } + if (KB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) { + return KB; + } + if (MB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) { + return MB; + } + if (GB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) { + return GB; + } + if (TB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) { + return TB; + } + throw new IllegalArgumentException("Unsupported ByteMultiple " + sMultiple); + } + } + + public TestDFSIO() { + this.config = new Configuration(); + } + + @Override + public void init() throws IOException { + this.config = new Configuration(); + config.setBoolean("dfs.support.append", true); + this.fs = new Path(baseDir).getFileSystem(config); + + checkArgs(); + switch (testType) { + case "-read": + type = TestType.TEST_TYPE_READ; + break; + case "-write": + type = TestType.TEST_TYPE_WRITE; + break; + case "-append": + type = TestType.TEST_TYPE_APPEND; + break; + case "-truncate": + type = TestType.TEST_TYPE_TRUNCATE; + break; + case "-clean": + type = TestType.TEST_TYPE_CLEANUP; + break; + default: + throw new IllegalArgumentException("wrong type"); + } + if (random) { + type = TestType.TEST_TYPE_READ_RANDOM; + } else if (backward) { + type = TestType.TEST_TYPE_READ_BACKWARD; + } else if (skip) { + type = TestType.TEST_TYPE_READ_SKIP; + } + int bufferSizeBytes = (int) parseSize(bufferSize); + long sizeInBytes = parseSize(size); + long skipSizeInBytes = skipSize == null ? 0 : parseSize(skipSize); + if (type == TestType.TEST_TYPE_READ_BACKWARD) { + skipSizeInBytes = -bufferSizeBytes; + } else if (type == TestType.TEST_TYPE_READ_SKIP && skipSizeInBytes == 0) { + skipSizeInBytes = bufferSizeBytes; + } + + config.setInt("test.io.file.buffer.size", bufferSizeBytes); + config.setLong("test.io.skip.size", skipSizeInBytes); + config.setBoolean("dfs.support.append", true); + config.setInt("test.threadsPerMap", threadsPerMap); + config.setInt("test.filesPerThread", filesPerThread); + config.set("test.basedir", baseDir); + config.setBoolean("test.randomBytes", randomBytes); + + LOG.info("type = " + type); + if (!local) { + LOG.info("maps = " + maps); + } + LOG.info("threads = " + threadsPerMap); + LOG.info("files = " + filesPerThread); + LOG.info("randomBytes = " + randomBytes); + LOG.info("fileSize (MB) = " + TestDFSIO.toMB(sizeInBytes)); + LOG.info("bufferSize = " + bufferSize); + if (skipSizeInBytes > 0) + LOG.info("skipSize = " + skipSize); + LOG.info("baseDir = " + baseDir); + + createControlFile(fs, sizeInBytes, maps); + if (compression != null) { + LOG.info("compressionClass = " + compression); + } + } + + private void checkArgs() { + if (!testType.equals("-read")) { + if (random || backward || skip) { + throw new IllegalArgumentException("random, backward, skip are only valid under read"); + } + } else { + boolean[] conds = {random, backward, skip}; + int trueCount = 0; + for (boolean cond : conds) { + if (cond) { + trueCount++; + if (trueCount > 1) { + throw new IllegalArgumentException("random, backward, skip are mutually exclusive"); + } + } + } + } + } + + private void localRun(TestType testType) throws IOException { + IOStatMapper ioer; + switch (testType) { + case TEST_TYPE_READ: + ioer = new ReadMapper(); + break; + case TEST_TYPE_WRITE: + ioer = new WriteMapper(); + fs.delete(getDataDir(config), true); + break; + case TEST_TYPE_APPEND: + ioer = new AppendMapper(); + break; + case TEST_TYPE_READ_RANDOM: + case TEST_TYPE_READ_BACKWARD: + case TEST_TYPE_READ_SKIP: + ioer = new RandomReadMapper(); + break; + case TEST_TYPE_TRUNCATE: + ioer = new TruncateMapper(); + break; + default: + return; + } + ExecutorService pool = Executors.newFixedThreadPool(threadsPerMap, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + return t; + }); + + ioer.configure(new JobConf(config)); + AtomicLong sizeProcessed = new AtomicLong(); + long start = System.currentTimeMillis(); + for (int i = 0; i < threadsPerMap; i++) { + int id = i; + pool.execute(() -> { + for (int j = 0; j < filesPerThread; j++) { + String name = String.format("%s/thread-%s/file-%s", getFileName(0), id, j); + try { + Long res = ioer.doIO(Reporter.NULL, name, parseSize(size), ioer.getIOStream(name)); + sizeProcessed.addAndGet(res); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } + } + }); + + } + pool.shutdown(); + try { + pool.awaitTermination(1, TimeUnit.DAYS); + } catch (InterruptedException ignored) { + } + long end = System.currentTimeMillis(); + + DecimalFormat df = new DecimalFormat("#.##"); + String resultLines[] = { + "----- TestClient ----- : " + testType, + " Date & time: " + new Date(System.currentTimeMillis()), + " Number of threads: " + threadsPerMap, + "Number files per thread: " + filesPerThread, + " Total files: " + threadsPerMap * filesPerThread, + " Total MBytes processed: " + df.format(TestDFSIO.toMB(sizeProcessed.get())), + "Total Throughput mb/sec: " + df.format(TestDFSIO.toMB(sizeProcessed.get()) / TestDFSIO.msToSecs(end - start)), + " Test exec time sec: " + df.format(TestDFSIO.msToSecs(end - start)), + ""}; + + for (String resultLine : resultLines) { + LOG.info(resultLine); + } + } + + @Override + public void run() throws IOException { + if (type == TestType.TEST_TYPE_CLEANUP) { + cleanup(fs); + return; + } + if (local) { + localRun(type); + return; + } + long tStart = System.currentTimeMillis(); + switch (type) { + case TEST_TYPE_WRITE: + writeTest(fs); + break; + case TEST_TYPE_READ: + readTest(fs); + break; + case TEST_TYPE_APPEND: + appendTest(fs); + break; + case TEST_TYPE_READ_RANDOM: + case TEST_TYPE_READ_BACKWARD: + case TEST_TYPE_READ_SKIP: + randomReadTest(fs); + break; + case TEST_TYPE_TRUNCATE: + truncateTest(fs); + break; + default: + } + long execTime = System.currentTimeMillis() - tStart; + + analyzeResult(fs, type, execTime); + } + + @Override + public String getCommand() { + return "dfsio"; + } + + private String getBaseDir(Configuration conf) { + return baseDir; + } + + private Path getControlDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_control"); + } + + private Path getWriteDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_write"); + } + + private Path getReadDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_read"); + } + + private Path getAppendDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_append"); + } + + private Path getRandomReadDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_random_read"); + } + + private Path getTruncateDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_truncate"); + } + + private Path getDataDir(Configuration conf) { + return new Path(getBaseDir(conf), "io_data"); + } + + + @SuppressWarnings("deprecation") + private void createControlFile(FileSystem fs, + long nrBytes, // in bytes + int maps + ) throws IOException { + LOG.info("creating control file: " + nrBytes + " bytes, " + maps + " files"); + final int maxDirItems = config.getInt("dfs.namenode.fs-limits.max-directory-items", 1024 * 1024); + Path controlDir = getControlDir(config); + + if (maps > maxDirItems) { + final String message = "The directory item limit of " + controlDir + + " is exceeded: limit=" + maxDirItems + " items=" + maps; + throw new IOException(message); + } + + fs.delete(controlDir, true); + + for (int i = 0; i < maps; i++) { + String name = getFileName(i); + Path controlFile = new Path(controlDir, "in_file_" + name); + SequenceFile.Writer writer = null; + try { + writer = SequenceFile.createWriter(fs, config, controlFile, + Text.class, LongWritable.class, + CompressionType.NONE); + writer.append(new Text(name), new LongWritable(nrBytes)); + } catch (Exception e) { + throw new IOException(e.getLocalizedMessage()); + } finally { + if (writer != null) { + writer.close(); + } + } + } + LOG.info("created control files for: " + maps + " files"); + } + + private static String getFileName(int fIdx) { + return BASE_FILE_NAME + fIdx; + } + + /** + * Write/Read mapper base class. + *

+ * Collects the following statistics per task: + *

    + *
  • number of tasks completed
  • + *
  • number of bytes written/read
  • + *
  • execution time
  • + *
  • i/o rate
  • + *
  • i/o rate squared
  • + *
+ */ + private abstract static class IOStatMapper extends IOMapperBase { + protected CompressionCodec compressionCodec; + private static final ThreadLocalRandom random = ThreadLocalRandom.current(); + private boolean randomBytes; + protected FileSystem fs; + protected String baseDir; + protected ThreadLocal buffer; + protected int bufferSize; + + IOStatMapper() { + } + + public byte[] getBuffer() { + if (randomBytes) { + random.nextBytes(buffer.get()); + } + return buffer.get(); + } + + @Override // Mapper + public void configure(JobConf conf) { + super.configure(conf); + bufferSize = conf.getInt("test.io.file.buffer.size", 4096); + buffer = ThreadLocal.withInitial(() -> new byte[bufferSize]); + try { + baseDir = conf.get("test.basedir"); + fs = new Path(baseDir).getFileSystem(conf); + } catch (IOException e) { + throw new RuntimeException("Cannot create file system.", e); + } + randomBytes = conf.getBoolean("test.randomBytes", false); + + // grab compression + String compression = getConf().get("test.io.compression.class", null); + Class codec; + + // try to initialize codec + try { + codec = (compression == null) ? null : + Class.forName(compression).asSubclass(CompressionCodec.class); + } catch (Exception e) { + throw new RuntimeException("Compression codec not found: ", e); + } + + if (codec != null) { + compressionCodec = (CompressionCodec) + ReflectionUtils.newInstance(codec, getConf()); + } + + } + + Path getDataDir() { + return new Path(baseDir, "io_data"); + } + + @Override + // IOMapperBase + void collectStats(OutputCollector output, + String name, + long execTime, + Long objSize) throws IOException { + long totalSize = objSize; + float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA); + LOG.info("Number of bytes processed = " + totalSize); + LOG.info("Exec time = " + execTime); + LOG.info("IO rate = " + ioRateMbSec); + + output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), + new Text(String.valueOf(threadsPerMap * filesPerThread))); + output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), + new Text(String.valueOf(totalSize))); + output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), + new Text(String.valueOf(execTime))); + output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), + new Text(String.valueOf(ioRateMbSec * 1000 * threadsPerMap))); + output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), + new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000 * threadsPerMap))); + } + } + + /** + * Write mapper class. + */ + public static class WriteMapper extends IOStatMapper { + + public WriteMapper() { + } + + @Override // IOMapperBase + public Closeable getIOStream(String name) throws IOException { + // create file + Path f = new Path(getDataDir(), name); + fs.mkdirs(f.getParent()); + OutputStream out = + fs.create(f, false, bufferSize); + if (compressionCodec != null) { + out = compressionCodec.createOutputStream(out); + } + LOG.info("out = " + out.getClass().getName()); + return out; + } + + @Override // IOMapperBase + public Long doIO(Reporter reporter, + String name, + long totalSize, // in bytes + Closeable stream) throws IOException { + OutputStream out = (OutputStream) stream; + + // write to the file + long nrRemaining; + for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) { + int curSize = (bufferSize < nrRemaining) ? bufferSize : (int) nrRemaining; + out.write(getBuffer(), 0, curSize); + reporter.setStatus("writing " + name + "@" + + (totalSize - nrRemaining) + "/" + totalSize + + " ::host = " + hostName); + } + return Long.valueOf(totalSize); + } + } + + private long writeTest(FileSystem fs) throws IOException { + Path writeDir = getWriteDir(config); + fs.delete(getDataDir(config), true); + fs.delete(writeDir, true); + long tStart = System.currentTimeMillis(); + runIOTest(WriteMapper.class, writeDir); + long execTime = System.currentTimeMillis() - tStart; + return execTime; + } + + private void runIOTest( + Class> mapperClass, + Path outputDir) throws IOException { + JobConf job = new JobConf(config, TestDFSIO.class); + + FileInputFormat.setInputPaths(job, getControlDir(config)); + job.setInputFormat(SequenceFileInputFormat.class); + + job.setMapperClass(mapperClass); + job.setReducerClass(AccumulatingReducer.class); + + FileOutputFormat.setOutputPath(job, outputDir); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(1); + JobClient.runJob(job); + } + + /** + * Append mapper class. + */ + public static class AppendMapper extends IOStatMapper { + + public AppendMapper() { + } + + @Override // IOMapperBase + public Closeable getIOStream(String name) throws IOException { + // open file for append + OutputStream out = + fs.append(new Path(getDataDir(), name), bufferSize); + if (compressionCodec != null) + out = compressionCodec.createOutputStream(out); + LOG.info("out = " + out.getClass().getName()); + return out; + } + + @Override // IOMapperBase + public Long doIO(Reporter reporter, + String name, + long totalSize, // in bytes + Closeable stream) throws IOException { + OutputStream out = (OutputStream) stream; + // write to the file + long nrRemaining; + for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) { + int curSize = (bufferSize < nrRemaining) ? bufferSize : (int) nrRemaining; + out.write(getBuffer(), 0, curSize); + reporter.setStatus("writing " + name + "@" + + (totalSize - nrRemaining) + "/" + totalSize + + " ::host = " + hostName); + } + return totalSize; + } + + + } + + private long appendTest(FileSystem fs) throws IOException { + Path appendDir = getAppendDir(config); + fs.delete(appendDir, true); + long tStart = System.currentTimeMillis(); + runIOTest(AppendMapper.class, appendDir); + return System.currentTimeMillis() - tStart; + } + + /** + * Read mapper class. + */ + public static class ReadMapper extends IOStatMapper { + + public ReadMapper() { + } + + @Override // IOMapperBase + public Closeable getIOStream(String name) throws IOException { + // open file + InputStream in = fs.open(new Path(getDataDir(), name)); + if (compressionCodec != null) { + in = compressionCodec.createInputStream(in); + } + LOG.info("in = " + in.getClass().getName()); + return in; + } + + @Override // IOMapperBase + public Long doIO(Reporter reporter, + String name, + long totalSize, // in bytes + Closeable stream) throws IOException { + InputStream in = (InputStream) stream; + long actualSize = 0; + while (actualSize < totalSize) { + int curSize = in.read(buffer.get(), 0, bufferSize); + if (curSize < 0) { + break; + } + actualSize += curSize; + reporter.setStatus("reading " + name + "@" + + actualSize + "/" + totalSize + + " ::host = " + hostName); + } + return actualSize; + } + } + + private long readTest(FileSystem fs) throws IOException { + Path readDir = getReadDir(config); + fs.delete(readDir, true); + long tStart = System.currentTimeMillis(); + runIOTest(ReadMapper.class, readDir); + return System.currentTimeMillis() - tStart; + } + + /** + * Mapper class for random reads. + * The mapper chooses a position in the file and reads bufferSize + * bytes starting at the chosen position. + * It stops after reading the totalSize bytes, specified by -size. + *

+ * There are three type of reads. + * 1) Random read always chooses a random position to read from: skipSize = 0 + * 2) Backward read reads file in reverse order : skipSize < 0 + * 3) Skip-read skips skipSize bytes after every read : skipSize > 0 + */ + public static class RandomReadMapper extends IOStatMapper { + private ThreadLocalRandom rnd; + private long fileSize; + private long skipSize; + + @Override // Mapper + public void configure(JobConf conf) { + super.configure(conf); + skipSize = conf.getLong("test.io.skip.size", 0); + } + + public RandomReadMapper() { + rnd = ThreadLocalRandom.current(); + } + + @Override // IOMapperBase + public Closeable getIOStream(String name) throws IOException { + Path filePath = new Path(getDataDir(), name); + this.fileSize = fs.getFileStatus(filePath).getLen(); + InputStream in = fs.open(filePath); + if (compressionCodec != null) + in = new FSDataInputStream(compressionCodec.createInputStream(in)); + LOG.info("in = " + in.getClass().getName()); + LOG.info("skipSize = " + skipSize); + return in; + } + + @Override // IOMapperBase + public Long doIO(Reporter reporter, + String name, + long totalSize, // in bytes + Closeable stream) throws IOException { + PositionedReadable in = (PositionedReadable) stream; + long actualSize = 0; + for (long pos = nextOffset(-1); + actualSize < totalSize; pos = nextOffset(pos)) { + int curSize = in.read(pos, buffer.get(), 0, bufferSize); + if (curSize < 0) break; + actualSize += curSize; + reporter.setStatus("reading " + name + "@" + + actualSize + "/" + totalSize + + " ::host = " + hostName); + } + return actualSize; + } + + /** + * Get next offset for reading. + * If current < 0 then choose initial offset according to the read type. + * + * @param current offset + * @return + */ + private long nextOffset(long current) { + if (skipSize == 0) + return rnd.nextLong(fileSize); + if (skipSize > 0) + return (current < 0) ? 0 : (current + bufferSize + skipSize); + // skipSize < 0 + return (current < 0) ? Math.max(0, fileSize - bufferSize) : + Math.max(0, current + skipSize); + } + } + + private long randomReadTest(FileSystem fs) throws IOException { + Path readDir = getRandomReadDir(config); + fs.delete(readDir, true); + long tStart = System.currentTimeMillis(); + runIOTest(RandomReadMapper.class, readDir); + return System.currentTimeMillis() - tStart; + } + + /** + * Truncate mapper class. + * The mapper truncates given file to the newLength, specified by -size. + */ + public static class TruncateMapper extends IOStatMapper { + private static final long DELAY = 100L; + + private Path filePath; + private long fileSize; + + @Override // IOMapperBase + public Closeable getIOStream(String name) throws IOException { + filePath = new Path(getDataDir(), name); + fileSize = fs.getFileStatus(filePath).getLen(); + return null; + } + + @Override // IOMapperBase + public Long doIO(Reporter reporter, + String name, + long newLength, // in bytes + Closeable stream) throws IOException { + boolean isClosed = fs.truncate(filePath, newLength); + reporter.setStatus("truncating " + name + " to newLength " + + newLength + " ::host = " + hostName); + for (int i = 0; !isClosed; i++) { + try { + Thread.sleep(DELAY); + } catch (InterruptedException ignored) { + } + FileStatus status = fs.getFileStatus(filePath); + assert status != null : "status is null"; + isClosed = (status.getLen() == newLength); + reporter.setStatus("truncate recover for " + name + " to newLength " + + newLength + " attempt " + i + " ::host = " + hostName); + } + return fileSize - newLength; + } + } + + private long truncateTest(FileSystem fs) throws IOException { + Path TruncateDir = getTruncateDir(config); + fs.delete(TruncateDir, true); + long tStart = System.currentTimeMillis(); + runIOTest(TruncateMapper.class, TruncateDir); + return System.currentTimeMillis() - tStart; + } + + /** + * Returns size in bytes. + * + * @param arg = {d}[B|KB|MB|GB|TB] + * @return + */ + static long parseSize(String arg) { + String[] args = arg.split("\\D", 2); // get digits + assert args.length <= 2; + long nrBytes = Long.parseLong(args[0]); + String bytesMult = arg.substring(args[0].length()); // get byte multiple + return nrBytes * ByteMultiple.parseString(bytesMult).value(); + } + + static float toMB(long bytes) { + return ((float) bytes) / MEGA; + } + + static float msToSecs(long timeMillis) { + return timeMillis / 1000.0f; + } + + private void analyzeResult(FileSystem fs, + TestType testType, + long execTime + ) throws IOException { + Path reduceFile = getReduceFilePath(testType); + long tasks = 0; + long size = 0; + long time = 0; + float rate = 0; + float sqrate = 0; + DataInputStream in = null; + BufferedReader lines = null; + try { + in = new DataInputStream(fs.open(reduceFile)); + lines = new BufferedReader(new InputStreamReader(in)); + String line; + while ((line = lines.readLine()) != null) { + StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); + String attr = tokens.nextToken(); + if (attr.endsWith(":tasks")) + tasks = Long.parseLong(tokens.nextToken()); + else if (attr.endsWith(":size")) + size = Long.parseLong(tokens.nextToken()); + else if (attr.endsWith(":time")) + time = Long.parseLong(tokens.nextToken()); + else if (attr.endsWith(":rate")) + rate = Float.parseFloat(tokens.nextToken()); + else if (attr.endsWith(":sqrate")) + sqrate = Float.parseFloat(tokens.nextToken()); + } + } finally { + if (in != null) in.close(); + if (lines != null) lines.close(); + } + + double med = rate / 1000 / tasks; + double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); + DecimalFormat df = new DecimalFormat("#.##"); + String resultLines[] = { + "----- TestDFSIO ----- : " + testType, + " Date & time: " + new Date(System.currentTimeMillis()), + " Number of files: " + tasks, + " Total MBytes processed: " + df.format(toMB(size)), + "Total Throughput MB/sec: " + df.format(toMB(size) / msToSecs(time) * tasks), + " Average IO rate MB/sec: " + df.format(med), + " IO rate std deviation: " + df.format(stdDev), + " Test exec time sec: " + df.format(msToSecs(execTime)), + ""}; + for (String resultLine : resultLines) { + LOG.info(resultLine); + } + } + + private Path getReduceFilePath(TestType testType) { + switch (testType) { + case TEST_TYPE_WRITE: + return new Path(getWriteDir(config), "part-00000"); + case TEST_TYPE_APPEND: + return new Path(getAppendDir(config), "part-00000"); + case TEST_TYPE_READ: + return new Path(getReadDir(config), "part-00000"); + case TEST_TYPE_READ_RANDOM: + case TEST_TYPE_READ_BACKWARD: + case TEST_TYPE_READ_SKIP: + return new Path(getRandomReadDir(config), "part-00000"); + case TEST_TYPE_TRUNCATE: + return new Path(getTruncateDir(config), "part-00000"); + default: + } + return null; + } + + private void cleanup(FileSystem fs) + throws IOException { + LOG.info("Cleaning up test files"); + fs.delete(new Path(getBaseDir(config)), true); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/metrics/JuiceFSInstrumentation.java b/sdk/java/src/main/java/io/juicefs/metrics/JuiceFSInstrumentation.java new file mode 100644 index 0000000..84183d5 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/metrics/JuiceFSInstrumentation.java @@ -0,0 +1,133 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.metrics; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + + +@Metrics(context = "JuiceFileSystem", name = "client") +public final class JuiceFSInstrumentation { + private static MetricsSystem system; + private static final String METRIC_NAME = "JuiceFSMetrics"; + + private static int numFileSystems; + + private final Map valueState = new HashMap<>(); + private final Map timeState = new HashMap<>(); + + static { + system = DefaultMetricsSystem.initialize("juicefs"); + } + + private final FileSystem fs; + private final FileSystem.Statistics statistics; + + @Metric("number of bytes read from JuiceFS") + public long getBytesRead() { + return statistics.getBytesRead(); + } + + @Metric("number of bytes write to JuiceFS") + public double getBytesWrite() { + return statistics.getBytesWritten(); + } + + @Metric("write speed") + public synchronized double getBytesWritePerSec() { + return getSpeedPerSec("writeSpeed", statistics.getBytesWritten()); + } + + + @Metric("read speed") + public synchronized double getBytesReadPerSec() { + return getSpeedPerSec("readSpeed", statistics.getBytesRead()); + } + + @Metric("JuiceFS client num") + public synchronized int getNumFileSystems() { + return 1; + } + + @Metric("JuiceFS used size") + public synchronized long getUsedSize() { + try { + return fs.getStatus(new Path("/")).getUsed(); + } catch (IOException e) { + return 0; + } + } + + @Metric("JuiceFS files") + public synchronized long getFiles() { + try { + return fs.getContentSummary(new Path("/")).getFileCount(); + } catch (IOException e) { + return 0; + } + } + + @Metric("JuiceFS dirs") + public synchronized long getDirs() { + try { + return fs.getContentSummary(new Path("/")).getDirectoryCount(); + } catch (IOException e) { + return 0; + } + } + + public double getSpeedPerSec(String name, long currentValue) { + double speed = 0; + long current = System.currentTimeMillis(); + long delta = current - timeState.getOrDefault(name, current); + if (delta > 0) { + speed = (currentValue - valueState.getOrDefault(name, currentValue)) / (delta / 1000.0); + } + valueState.put(name, currentValue); + timeState.put(name, current); + return speed; + } + + public static synchronized void init(FileSystem fs, FileSystem.Statistics statistics) { + if (numFileSystems == 0) { + DefaultMetricsSystem.instance().register(METRIC_NAME, "JuiceFS client metrics", + new JuiceFSInstrumentation(fs, statistics)); + } + numFileSystems++; + } + + private JuiceFSInstrumentation(FileSystem fs, FileSystem.Statistics statistics) { + this.fs = fs; + this.statistics = statistics; + } + + public static synchronized void close() throws IOException { + if (numFileSystems == 1) { + system.publishMetricsNow(); + system.unregisterSource(METRIC_NAME); + } + numFileSystems--; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/CallerContextUtil.java b/sdk/java/src/main/java/io/juicefs/utils/CallerContextUtil.java new file mode 100644 index 0000000..1f78364 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/CallerContextUtil.java @@ -0,0 +1,35 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.apache.hadoop.ipc.CallerContext; + + +public class CallerContextUtil { + + public static void setContext(String context) throws Exception { + CallerContext current = CallerContext.getCurrent(); + CallerContext.Builder builder; + if (current == null || !current.isContextValid()) { + builder = new CallerContext.Builder(context); + CallerContext.setCurrent(builder.build()); + } else if (current.getSignature() == null && !current.getContext().endsWith("_" + context)) { + builder = new CallerContext.Builder(current.getContext() + "_" + context); + CallerContext.setCurrent(builder.build()); + } + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/ConsistentHash.java b/sdk/java/src/main/java/io/juicefs/utils/ConsistentHash.java new file mode 100644 index 0000000..5378091 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/ConsistentHash.java @@ -0,0 +1,80 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.utils; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +import java.util.List; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentSkipListMap; + +public class ConsistentHash { + + private final int numberOfVirtualNodeReplicas; + private final SortedMap circle = new ConcurrentSkipListMap<>(); + private final HashFunction nodeHash = Hashing.murmur3_32(); + private final HashFunction keyHash = Hashing.murmur3_32(); + + public ConsistentHash(int numberOfVirtualNodeReplicas, List nodes) { + this.numberOfVirtualNodeReplicas = numberOfVirtualNodeReplicas; + addNode(nodes); + } + + public void addNode(List nodes) { + for (T node : nodes) { + addNode(node); + } + } + + public void addNode(T node) { + for (int i = 0; i < numberOfVirtualNodeReplicas; i++) { + circle.put(getKetamaHash(i + "" + node), node); + } + } + + public void remove(List nodes) { + for (T node : nodes) { + remove(node); + } + } + + public void remove(T node) { + for (int i = 0; i < numberOfVirtualNodeReplicas; i++) { + circle.remove(getKetamaHash(i + "" + node)); + } + } + + public T get(Object key) { + if (circle.isEmpty()) { + return null; + } + int hash = getKeyHash(key.toString()); + if (!circle.containsKey(hash)) { + SortedMap tailMap = circle.tailMap(hash); + hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey(); + } + return circle.get(hash); + } + + private int getKeyHash(final String k) { + return keyHash.hashBytes(k.getBytes()).asInt(); + } + + private int getKetamaHash(final String k) { + return nodeHash.hashBytes(k.getBytes()).asInt(); + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/FsNodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/FsNodesFetcher.java new file mode 100644 index 0000000..df96ecb --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/FsNodesFetcher.java @@ -0,0 +1,60 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class FsNodesFetcher extends NodesFetcher { + private static final Log LOG = LogFactory.getLog(FsNodesFetcher.class); + + public FsNodesFetcher(String jfsName) { + super(jfsName); + } + + @Override + public List fetchNodes(String uri) { + Path path = new Path(uri); + try { + FileSystem fs = path.getFileSystem(new Configuration()); + if (!fs.exists(path)) return null; + FSDataInputStream inputStream = fs.open(path); + List res = new BufferedReader(new InputStreamReader(inputStream)) + .lines().collect(Collectors.toList()); + inputStream.close(); + return res; + } catch (Throwable e) { + LOG.warn(e.getMessage()); + } + return null; + } + + @Override + protected Set parseNodes(String response) throws Exception { + return null; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/NodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/NodesFetcher.java new file mode 100644 index 0000000..2db440b --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/NodesFetcher.java @@ -0,0 +1,145 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import sun.net.www.protocol.http.Handler; + +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * fetch calculate nodes of the cluster + */ +public abstract class NodesFetcher { + private static final Log LOG = LogFactory.getLog(NodesFetcher.class); + + protected File cacheFolder = new File("/tmp/.juicefs"); + protected File cacheFile; + private String jfsName; + + private static Handler handler = new Handler(); + + public NodesFetcher(String jfsName) { + this.jfsName = jfsName; + if (!cacheFolder.exists()) { + cacheFolder.mkdirs(); + } + cacheFile = new File(cacheFolder, jfsName + ".nodes"); + cacheFolder.setWritable(true, false); + cacheFolder.setReadable(true, false); + cacheFolder.setExecutable(true, false); + cacheFile.setWritable(true, false); + cacheFile.setReadable(true, false); + cacheFile.setExecutable(true, false); + } + + public List fetchNodes(String urls) { + List result = readCache(); + + // refresh local disk cache every 10 mins + long duration = System.currentTimeMillis() - cacheFile.lastModified(); + if (duration > 10 * 60 * 1000L || result == null) { + Set nodes = getNodes(urls.split(",")); + if (nodes == null) return result; + result = new ArrayList<>(nodes); + cache(result); + } + + return result; + } + + public List readCache() { + try { + if (!cacheFile.exists()) return null; + return Files.readAllLines(cacheFile.toPath()); + } catch (IOException e) { + LOG.warn("read cache failed due to: ", e); + return null; + } + } + + public void cache(List hostnames) { + File tmpFile = new File(cacheFolder, System.getProperty("user.name") + "-" + jfsName + ".nodes.tmp"); + try (RandomAccessFile writer = new RandomAccessFile(tmpFile, "rws")) { + tmpFile.setWritable(true, false); + tmpFile.setReadable(true, false); + if (hostnames != null) { + String content = String.join("\n", hostnames); + writer.write(content.getBytes()); + } + tmpFile.renameTo(cacheFile); + } catch (IOException e) { + LOG.warn("wirte cache failed due to: ", e); + } + } + + public Set getNodes(String[] urls) { + if (urls == null) { + return null; + } + for (String url : urls) { + try { + String response = doGet(url); + if (response == null) { + continue; + } + return parseNodes(response); + } catch (Throwable e) { + LOG.warn("fetch from:" + url + " failed, switch to another url", e); + } + } + return null; + } + + protected abstract Set parseNodes(String response) throws Exception; + + protected String doGet(String url) { + int timeout = 3; // seconds + + HttpURLConnection con = null; + try { + con = (HttpURLConnection) new URL(null, url, handler).openConnection(); + con.setConnectTimeout(timeout * 1000); + con.setReadTimeout(timeout * 1000); + + int status = con.getResponseCode(); + if (status != 200) return null; + + BufferedReader in = new BufferedReader( + new InputStreamReader(con.getInputStream(), StandardCharsets.UTF_8)); + String content = in.lines().collect(Collectors.joining("\n")); + in.close(); + return content; + } catch (IOException e) { + LOG.warn(e); + return null; + } finally { + if (con != null) { + con.disconnect(); + } + } + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/NodesFetcherBuilder.java b/sdk/java/src/main/java/io/juicefs/utils/NodesFetcherBuilder.java new file mode 100644 index 0000000..204b1b7 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/NodesFetcherBuilder.java @@ -0,0 +1,35 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +public class NodesFetcherBuilder { + public static NodesFetcher buildFetcher(String urls, String jfsName) { + NodesFetcher fetcher; + if (urls.contains("cluster/nodes") || "yarn".equals(urls.toLowerCase().trim())) { + fetcher = new YarnNodesFetcher(jfsName); + } else if (urls.contains("service/presto")) { + fetcher = new PrestoNodesFetcher(jfsName); + } else if (urls.contains("/json")) { + fetcher = new SparkNodesFetcher(jfsName); + } else if (urls.contains("api/v1/applications")) { + fetcher = new SparkThriftNodesFetcher(jfsName); + } else { + fetcher = new FsNodesFetcher(jfsName); + } + return fetcher; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/PatchUtil.java b/sdk/java/src/main/java/io/juicefs/utils/PatchUtil.java new file mode 100644 index 0000000..4733289 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/PatchUtil.java @@ -0,0 +1,114 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import javassist.ClassPool; +import javassist.CtClass; +import javassist.CtMethod; +import javassist.NotFoundException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.instrument.ClassDefinition; + +public class PatchUtil { + private static final Logger LOG = LoggerFactory.getLogger(PatchUtil.class); + + public enum PatchType { + BODY, BEFORE, AFTER + } + + public static class ClassMethod { + private String method; + private String[] params; + private PatchType[] types; + private String[] codes; + + public ClassMethod(String method, String[] params, String[] codes, PatchType[] types) { + if (codes.length != types.length) { + LOG.error("{} has {} codes, but only {} types", method, codes.length, types.length); + } + this.method = method; + this.params = params; + this.codes = codes; + this.types = types; + } + } + + public static synchronized void doPatch(String className, ClassMethod[] classMethods) { + + ClassPool classPool = ClassPool.getDefault(); + try { + CtClass cls = classPool.get(className); + + for (ClassMethod classMethod : classMethods) { + String method = classMethod.method; + + CtMethod mtd; + String[] params = classMethod.params; + if (params != null) { + CtClass[] cts = new CtClass[params.length]; + for (int i = 0; i < params.length; i++) { + cts[i] = classPool.get(params[i]); + } + mtd = cls.getDeclaredMethod(method, cts); + } else { + mtd = cls.getDeclaredMethod(method); + } + + String[] codes = classMethod.codes; + PatchType[] types = classMethod.types; + for (int i = 0; i < codes.length; i++) { + switch (types[i]) { + case BODY: + mtd.setBody(codes[0]); + break; + case AFTER: + mtd.insertAfter(codes[0], true); + break; + case BEFORE: + mtd.insertBefore(codes[0]); + break; + } + } + } + + RedefineClassAgent.redefineClasses(new ClassDefinition(Class.forName(className), cls.toBytecode())); + cls.detach(); + } catch (NotFoundException | NoClassDefFoundError ignored) { + } catch (Throwable e) { + LOG.warn(String.format("patch %s failed", className), e); + } + } + + public static void patchBody(String className, String method, String[] params, String code) { + doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.BODY})}); + } + + public static void patchBefore(String className, String method, String[] params, String code) { + doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.BEFORE})}); + } + + public static void patchAfter(String className, String method, String[] params, String code) { + doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.AFTER})}); + } + + public static void patchBeforeAndAfter(String className, String method, String[] params, String beforeCode, String afterCode) { + doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{beforeCode, afterCode}, new PatchType[]{PatchType.BEFORE, PatchType.AFTER})}); + } + +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/PrestoNodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/PrestoNodesFetcher.java new file mode 100644 index 0000000..74875c4 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/PrestoNodesFetcher.java @@ -0,0 +1,46 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.json.JSONArray; +import org.json.JSONObject; + +import java.net.URL; +import java.util.HashSet; +import java.util.Set; + +public class PrestoNodesFetcher extends NodesFetcher { + + public PrestoNodesFetcher(String jfsName) { + super(jfsName); + } + + // url like "http://hadoop01:8000/v1/service/presto" + @Override + protected Set parseNodes(String response) throws Exception { + Set result = new HashSet<>(); + JSONArray nodes = new JSONObject(response).getJSONArray("services"); + for (Object node : nodes) { + JSONObject nodeProperties = ((JSONObject) node).getJSONObject("properties"); + if (nodeProperties.getString("coordinator").equals("false")) { + String http = nodeProperties.getString("http"); + result.add(new URL(http).getHost()); + } + } + return result; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/RedefineClassAgent.java b/sdk/java/src/main/java/io/juicefs/utils/RedefineClassAgent.java new file mode 100644 index 0000000..c5ef1dc --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/RedefineClassAgent.java @@ -0,0 +1,229 @@ +/* +Copyright 2017 Turn Inc +All rights reserved. +The contents of this file are subject to the MIT License as provided +below. Alternatively, the contents of this file may be used under +the terms of Mozilla Public License Version 1.1, +the terms of the GNU Lesser General Public License Version 2.1 or later, +or the terms of the Apache License Version 2.0. +License: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package io.juicefs.utils; + + +import com.sun.tools.attach.VirtualMachine; +import javassist.CannotCompileException; +import javassist.ClassPool; +import javassist.CtClass; +import javassist.NotFoundException; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.lang.instrument.ClassDefinition; +import java.lang.instrument.Instrumentation; +import java.lang.instrument.UnmodifiableClassException; +import java.lang.management.ManagementFactory; +import java.util.jar.Attributes; +import java.util.jar.JarEntry; +import java.util.jar.JarOutputStream; +import java.util.jar.Manifest; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Packages everything necessary to be able to redefine a class using {@link Instrumentation} as provided by + * Java 1.6 or later. Class redefinition is the act of replacing a class' bytecode at runtime, after that class + * has already been loaded. + *

+ * The scheme employed by this class uses an agent (defined by this class) that, when loaded into the JVM, provides + * an instance of {@link Instrumentation} which in turn provides a method to redefine classes. + *

+ * Users of this class only need to call {@link #redefineClasses(ClassDefinition...)}. The agent stuff will be done + * automatically (and lazily). + *

+ * Note that classes cannot be arbitrarily redefined. The new version must retain the same schema; methods and fields + * cannot be added or removed. In practice this means that method bodies can be changed. + *

+ * Note that this is a replacement for javassist's {@code HotSwapper}. {@code HotSwapper} depends on the debug agent + * to perform the hotswap. That agent is available since Java 1.3, but the JVM must be started with the agent enabled, + * and the agent often fails to perform the swap if the machine is under heavy load. This class is both cleaner and more + * reliable. + * + * @author Adam Lugowski + * @see Instrumentation#redefineClasses(ClassDefinition...) + */ +public class RedefineClassAgent { + /** + * Use the Java logger to avoid any references to anything not supplied by the JVM. This avoids issues with + * classpath when compiling/loading this class as an agent. + */ + private static final Logger LOGGER = Logger.getLogger(RedefineClassAgent.class.getSimpleName()); + + /** + * Populated when this class is loaded into the JVM as an agent (via {@link #ensureAgentLoaded()}. + */ + private static volatile Instrumentation instrumentation = null; + + /** + * How long to wait for the agent to load before giving up and assuming the load failed. + */ + private static final int AGENT_LOAD_WAIT_TIME_SEC = 3; + + /** + * Agent entry point. Do not call this directly. + *

+ * This method is called by the JVM when this class is loaded as an agent. + *

+ * Sets {@link #instrumentation} to {@code inst}, provided {@code inst} supports class redefinition. + * + * @param agentArgs ignored. + * @param inst This is the reason this class exists. {@link Instrumentation} has the + * {@link Instrumentation#redefineClasses(ClassDefinition...)} method. + */ + public static void agentmain(String agentArgs, Instrumentation inst) { + if (!inst.isRedefineClassesSupported()) { + LOGGER.severe("Class redefinition not supported. Aborting."); + return; + } + + instrumentation = inst; + } + + /** + * Attempts to redefine class bytecode. + *

+ * On first call this method will attempt to load an agent into the JVM to obtain an instance of + * {@link Instrumentation}. This agent load can introduce a pause (in practice 1 to 2 seconds). + * + * @param definitions classes to redefine. + * @throws UnmodifiableClassException as thrown by {@link Instrumentation#redefineClasses(ClassDefinition...)} + * @throws ClassNotFoundException as thrown by {@link Instrumentation#redefineClasses(ClassDefinition...)} + * @throws FailedToLoadAgentException if agent either failed to load or if the agent wasn't able to get an + * instance of {@link Instrumentation} that allows class redefinitions. + * @see Instrumentation#redefineClasses(ClassDefinition...) + */ + public static void redefineClasses(ClassDefinition... definitions) + throws UnmodifiableClassException, ClassNotFoundException, FailedToLoadAgentException { + ensureAgentLoaded(); + instrumentation.redefineClasses(definitions); + } + + /** + * Lazy loads the agent that populates {@link #instrumentation}. OK to call multiple times. + * + * @throws FailedToLoadAgentException if agent either failed to load or if the agent wasn't able to get an + * instance of {@link Instrumentation} that allows class redefinitions. + */ + private static void ensureAgentLoaded() throws FailedToLoadAgentException { + if (instrumentation != null) { + // already loaded + return; + } + + // load the agent + try { + File agentJar = createAgentJarFile(); + + // Loading an agent requires the PID of the JVM to load the agent to. Find out our PID. + String nameOfRunningVM = ManagementFactory.getRuntimeMXBean().getName(); + String pid = nameOfRunningVM.substring(0, nameOfRunningVM.indexOf('@')); + + // load the agent + VirtualMachine vm = VirtualMachine.attach(pid); + vm.loadAgent(agentJar.getAbsolutePath(), ""); + vm.detach(); + } catch (Exception e) { + throw new FailedToLoadAgentException(e); + } + + // wait for the agent to load + for (int sec = 0; sec < AGENT_LOAD_WAIT_TIME_SEC; sec++) { + if (instrumentation != null) { + // success! + return; + } + + try { + LOGGER.info("Sleeping for 1 second while waiting for agent to load."); + Thread.sleep(1000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new FailedToLoadAgentException(); + } + } + + // agent didn't load + throw new FailedToLoadAgentException(); + } + + /** + * An agent must be specified as a .jar where the manifest has an Agent-Class attribute. Additionally, in order + * to be able to redefine classes, the Can-Redefine-Classes attribute must be true. + *

+ * This method creates such an agent Jar as a temporary file. The Agent-Class is this class. If the returned Jar + * is loaded as an agent then {@link #agentmain(String, Instrumentation)} will be called by the JVM. + * + * @return a temporary {@link File} that points at Jar that packages this class. + * @throws IOException if agent Jar creation failed. + */ + private static File createAgentJarFile() throws IOException { + File jarFile = File.createTempFile("agent", ".jar"); + jarFile.deleteOnExit(); + + // construct a manifest that allows class redefinition + Manifest manifest = new Manifest(); + Attributes mainAttributes = manifest.getMainAttributes(); + mainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0"); + mainAttributes.put(new Attributes.Name("Agent-Class"), RedefineClassAgent.class.getName()); + mainAttributes.put(new Attributes.Name("Can-Retransform-Classes"), "true"); + mainAttributes.put(new Attributes.Name("Can-Redefine-Classes"), "true"); + + try (JarOutputStream jos = new JarOutputStream(new FileOutputStream(jarFile), manifest)) { + // add the agent .class into the .jar + JarEntry agent = new JarEntry(RedefineClassAgent.class.getName().replace('.', '/') + ".class"); + jos.putNextEntry(agent); + + // dump the class bytecode into the entry + ClassPool pool = ClassPool.getDefault(); + CtClass ctClass = pool.get(RedefineClassAgent.class.getName()); + jos.write(ctClass.toBytecode()); + jos.closeEntry(); + } catch (CannotCompileException | NotFoundException e) { + // Realistically this should never happen. + LOGGER.log(Level.SEVERE, "Exception while creating RedefineClassAgent jar.", e); + throw new IOException(e); + } + + return jarFile; + } + + /** + * Marks a failure to load the agent and get an instance of {@link Instrumentation} that is able to redefine + * classes. + */ + public static class FailedToLoadAgentException extends Exception { + public FailedToLoadAgentException() { + super(); + } + + public FailedToLoadAgentException(Throwable cause) { + super(cause); + } + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/SparkNodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/SparkNodesFetcher.java new file mode 100644 index 0000000..678f445 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/SparkNodesFetcher.java @@ -0,0 +1,42 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.json.JSONArray; +import org.json.JSONObject; + +import java.util.HashSet; +import java.util.Set; + +public class SparkNodesFetcher extends NodesFetcher { + public SparkNodesFetcher(String jfsName) { + super(jfsName); + } + + // url like "http://host:8888/json/" + @Override + protected Set parseNodes(String response) throws Exception { + Set result = new HashSet<>(); + JSONArray workers = new JSONObject(response).getJSONArray("workers"); + for (Object worker : workers) { + if (((JSONObject) worker).getString("state").equals("ALIVE")) { + result.add(((JSONObject) worker).getString("host")); + } + } + return result; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/SparkThriftNodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/SparkThriftNodesFetcher.java new file mode 100644 index 0000000..3d37359 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/SparkThriftNodesFetcher.java @@ -0,0 +1,74 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.util.HashSet; +import java.util.Set; + +// "http://hadoop01:4040/api/v1/applications/"; +public class SparkThriftNodesFetcher extends NodesFetcher { + private static final Log LOG = LogFactory.getLog(SparkThriftNodesFetcher.class); + + public SparkThriftNodesFetcher(String jfsName) { + super(jfsName); + } + + @Override + public Set getNodes(String[] urls) { + if (urls == null || urls.length == 0) { + return null; + } + for (String url : urls) { + try { + JSONArray appArrays = new JSONArray(doGet(url)); + if (appArrays.length() > 0) { + String id = appArrays.getJSONObject(0).getString("id"); + url = url.endsWith("/") ? url : url + "/"; + return parseNodes(doGet(url + id + "/allexecutors")); + } + } catch (Throwable e) { + LOG.warn("fetch from spark thrift server failed!", e); + } + } + return null; + } + + @Override + protected Set parseNodes(String response) throws Exception { + if (response == null) { + return null; + } + Set res = new HashSet<>(); + for (Object item : new JSONArray(response)) { + JSONObject obj = (JSONObject) item; + String id = obj.getString("id"); + boolean isActive = obj.getBoolean("isActive"); + String hostPort = obj.getString("hostPort"); + boolean isBlacklisted = obj.getBoolean("isBlacklisted"); + String[] hAp = hostPort.split(":"); + if (hAp.length > 0 && !"driver".equals(id) && isActive && !isBlacklisted) { + res.add(hAp[0]); + } + } + return res; + } +} diff --git a/sdk/java/src/main/java/io/juicefs/utils/YarnNodesFetcher.java b/sdk/java/src/main/java/io/juicefs/utils/YarnNodesFetcher.java new file mode 100644 index 0000000..2cca5e0 --- /dev/null +++ b/sdk/java/src/main/java/io/juicefs/utils/YarnNodesFetcher.java @@ -0,0 +1,73 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.util.*; + +public class YarnNodesFetcher extends NodesFetcher { + private static final Log LOG = LogFactory.getLog(YarnNodesFetcher.class); + + public YarnNodesFetcher(String jfsName) { + super(jfsName); + } + + @Override + public Set getNodes(String[] urls) { + if (urls == null || urls.length == 0) { + return null; + } + List yarnUrls = Arrays.asList(urls); + for (String url : urls) { + if ("yarn".equals(url.toLowerCase().trim())) { + Configuration conf = new Configuration(); + Map props = conf.getValByRegex("yarn\\.resourcemanager\\.webapp\\.address.*"); + if (props.size() == 0) { + return null; + } + yarnUrls = new ArrayList<>(); + for (String v : props.values()) { + yarnUrls.add("http://" + v + "/ws/v1/cluster/nodes/"); + } + break; + } + } + return super.getNodes(yarnUrls.toArray(new String[0])); + } + + @Override + protected Set parseNodes(String response) { + Set result = new HashSet<>(); + JSONArray allNodes = new JSONObject(response).getJSONObject("nodes").getJSONArray("node"); + for (Object obj : allNodes) { + if (obj instanceof JSONObject) { + JSONObject node = (JSONObject) obj; + String state = node.getString("state"); + String hostname = node.getString("nodeHostName"); + if ("RUNNING".equals(state)) { + result.add(hostname); + } + } + } + return result; + } +} diff --git a/sdk/java/src/main/resources/META-INF/services/org.apache.flink.core.fs.FileSystemFactory b/sdk/java/src/main/resources/META-INF/services/org.apache.flink.core.fs.FileSystemFactory new file mode 100644 index 0000000..e0986f6 --- /dev/null +++ b/sdk/java/src/main/resources/META-INF/services/org.apache.flink.core.fs.FileSystemFactory @@ -0,0 +1,15 @@ +# JuiceFS, Copyright 2020 Juicedata, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +io.juicefs.FlinkFileSystemFactory \ No newline at end of file diff --git a/sdk/java/src/main/resources/META-INF/services/org.kitesdk.data.spi.Loadable b/sdk/java/src/main/resources/META-INF/services/org.kitesdk.data.spi.Loadable new file mode 100644 index 0000000..e75ae7f --- /dev/null +++ b/sdk/java/src/main/resources/META-INF/services/org.kitesdk.data.spi.Loadable @@ -0,0 +1,15 @@ +# JuiceFS, Copyright 2021 Juicedata, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +io.juicefs.KiteDataLoader diff --git a/sdk/java/src/test/java/io/juicefs/JuiceFileSystemTest.java b/sdk/java/src/test/java/io/juicefs/JuiceFileSystemTest.java new file mode 100644 index 0000000..385579c --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/JuiceFileSystemTest.java @@ -0,0 +1,613 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs; + +import io.juicefs.utils.PatchUtil; +import junit.framework.TestCase; +import org.apache.commons.io.IOUtils; +import org.apache.flink.runtime.fs.hdfs.HadoopRecoverableWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.nio.ByteBuffer; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertArrayEquals; + +public class JuiceFileSystemTest extends TestCase { + FsShell shell; + FileSystem fs; + Configuration cfg; + + public void setUp() throws Exception { + cfg = new Configuration(); + cfg.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml")); + Thread.currentThread().interrupt(); + fs = FileSystem.get(cfg); + Thread.interrupted(); + fs.delete(new Path("/hello")); + FSDataOutputStream out = fs.create(new Path("/hello"), true); + out.writeBytes("hello\n"); + out.close(); + + cfg.setQuietMode(false); + shell = new FsShell(cfg); + } + + public void tearDown() throws Exception { + fs.close(); + } + + public void testFsStatus() throws IOException { + FsStatus st = fs.getStatus(); + assertTrue("capacity", st.getCapacity() > 0); + assertTrue("remaining", st.getRemaining() > 0); + } + + public void testSummary() throws IOException { + ContentSummary summary = fs.getContentSummary(new Path("/")); + assertTrue("length", summary.getLength() > 0); + assertTrue("fileCount", summary.getFileCount() > 0); + summary = fs.getContentSummary(new Path("/hello")); + assertEquals(6, summary.getLength()); + assertEquals(1, summary.getFileCount()); + assertEquals(0, summary.getDirectoryCount()); + } + + public void testLongName() throws IOException { + Path p = new Path( + "/longname/very_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_name"); + fs.mkdirs(p); + FileStatus[] files = fs.listStatus(new Path("/longname")); + if (files.length != 1) { + throw new IOException("expected one file but got " + files.length); + } + if (!files[0].getPath().getName().equals(p.getName())) { + throw new IOException("not equal"); + } + } + + public void testLocation() throws IOException { + FileStatus f = new FileStatus(3L << 30, false, 1, 128L << 20, 0, new Path("/hello")); + BlockLocation[] locations = fs.getFileBlockLocations(f, 128L * 1024 * 1024 - 256, 5L * 64 * 1024 * 1024 - 512L); + + String[] names = locations[0].getNames(); + for (String name : names) { + assertEquals(name.split(":").length, 2); + } + + String[] storageIds = locations[0].getStorageIds(); + assertNotNull(storageIds); + assertEquals(names.length, storageIds.length); + } + + public void testReadWrite() throws Exception { + long l = fs.getFileStatus(new Path("/hello")).getLen(); + assertEquals(6, l); + byte[] buf = new byte[(int) l]; + FSDataInputStream in = fs.open(new Path("/hello")); + in.readFully(buf); + in.close(); + assertEquals("hello\n", new String(buf)); + assertEquals(0, shell.run(new String[]{"-cat", "/hello"})); + + fs.setPermission(new Path("/hello"), new FsPermission((short) 0000)); + UserGroupInformation ugi = + UserGroupInformation.createUserForTesting("nobody", new String[]{"nogroup"}); + FileSystem fs2 = ugi.doAs(new PrivilegedExceptionAction() { + @Override + public FileSystem run() throws Exception { + return FileSystem.get(new URI("jfs://dev"), cfg); + } + }); + try { + in = fs2.open(new Path("/hello")); + assertEquals(in, null); + } catch (IOException e) { + fs.setPermission(new Path("/hello"), new FsPermission((short) 0644)); + } + } + + public void testReadSkip() throws Exception { + Path p = new Path("/test_readskip"); + fs.create(p).close(); + String content = "12345"; + writeFile(fs, p, content); + FSDataInputStream in = fs.open(p); + long skip = in.skip(2); + assertEquals(2, skip); + + byte[] bytes = new byte[content.length() - (int)skip]; + in.readFully(bytes); + assertEquals("345", new String(bytes)); + } + + public void testInitStubLoaderFailed() throws Exception { + PatchUtil.patchBefore(JuiceFileSystemImpl.class.getName(), "initStubLoader", null, "Thread.currentThread().interrupt();"); + FileSystem newFs = createNewFs(cfg, null, null); + newFs.close(); + } + + public void testReadAfterClose() throws Exception { + byte[] buf = new byte[6]; + FSDataInputStream in = fs.open(new Path("/hello")); + in.close(); + try { + in.read(0, buf, 0, 5); + } catch (IOException e) { + if (!e.getMessage().contains("closed")) { + throw new IOException("message should be closed, but got " + e.getMessage()); + } + } + FSDataInputStream in2 = fs.open(new Path("/hello")); + in.close(); // repeated close should not close other's fd + in2.read(0, buf, 0, 5); + in2.close(); + } + + public void testMkdirs() throws Exception { + assertTrue(fs.mkdirs(new Path("/mkdirs"))); + assertTrue(fs.mkdirs(new Path("/mkdirs/dir"))); + assertTrue(fs.delete(new Path("/mkdirs"), true)); + assertTrue(fs.mkdirs(new Path("/mkdirs/test"))); + for (int i = 0; i < 5000; i++) { + fs.mkdirs(new Path("/mkdirs/d" + i)); + } + assertEquals(5001, fs.listStatus(new Path("/mkdirs/")).length); + assertTrue(fs.delete(new Path("/mkdirs"), true)); + } + + public void testCreateWithoutPermission() throws Exception { + assertTrue(fs.mkdirs(new Path("/noperm"))); + fs.setPermission(new Path("/noperm"), new FsPermission((short) 0555)); + UserGroupInformation ugi = + UserGroupInformation.createUserForTesting("nobody", new String[]{"nogroup"}); + FileSystem fs2 = ugi.doAs(new PrivilegedExceptionAction() { + @Override + public FileSystem run() throws Exception { + return FileSystem.get(new URI("jfs://dev"), cfg); + } + }); + try { + fs2.create(new Path("/noperm/a/file")); + throw new Exception("create should fail"); + } catch (IOException e) { + } + } + + public void testCreateNonRecursive() throws Exception { + Path p = new Path("/NOT_EXIST_DIR"); + p = new Path(p, "file"); + try (FSDataOutputStream ou = fs.createNonRecursive(p, false, 1 << 20, (short) 1, 128 << 20, null);) { + fail("createNonRecursive in a not exit dir should fail"); + } catch (IOException ignored) { + } + } + + public void testTruncate() throws Exception { + Path p = new Path("/test_truncate"); + fs.create(p).close(); + fs.truncate(p, 1 << 20); + assertEquals(1 << 20, fs.getFileStatus(p).getLen()); + fs.truncate(p, 1 << 10); + assertEquals(1 << 10, fs.getFileStatus(p).getLen()); + } + + public void testAccess() throws Exception { + Path p1 = new Path("/test_access"); + FileSystem newFs = createNewFs(cfg, "user1", new String[]{"group1"}); + newFs.create(p1).close(); + newFs.setPermission(p1, new FsPermission((short) 0444)); + newFs.access(p1, FsAction.READ); + try { + newFs.access(p1, FsAction.WRITE); + fail("The access call should have failed."); + } catch (AccessControlException e) { + } + + Path badPath = new Path("/bad/bad"); + try { + newFs.access(badPath, FsAction.READ); + fail("The access call should have failed"); + } catch (FileNotFoundException e) { + } + newFs.close(); + } + + public void testSetPermission() throws Exception { + assertEquals(0, shell.run(new String[]{"-chmod", "0777", "/hello"})); + assertEquals(0777, fs.getFileStatus(new Path("/hello")).getPermission().toShort()); + assertEquals(0, shell.run(new String[]{"-chmod", "0666", "/hello"})); + assertEquals(0666, fs.getFileStatus(new Path("/hello")).getPermission().toShort()); + } + + public void testSetTimes() throws Exception { + fs.setTimes(new Path("/hello"), 1000, 2000); + assertEquals(1000, fs.getFileStatus(new Path("/hello")).getModificationTime()); + // assertEquals(2000, fs.getFileStatus(new Path("/hello")).getAccessTime()); + + Path p = new Path("/test-mtime"); + fs.delete(p, true); + FSDataOutputStream out = fs.create(p); + Thread.sleep(1000); + long mtime1 = fs.getFileStatus(p).getModificationTime(); + out.writeBytes("hello\n"); + out.close(); + long mtime2 = fs.getFileStatus(p).getModificationTime(); + if (mtime2 - mtime1 < 1000) { + throw new IOException("stale mtime"); + } + Thread.sleep(1000); + long mtime3 = fs.getFileStatus(p).getModificationTime(); + if (mtime3 != mtime2) { + throw new IOException("mtime was updated"); + } + } + + public void testSetOwner() throws Exception { + fs.create(new Path("/hello")); + FileStatus parent = fs.getFileStatus(new Path("/")); + FileStatus st = fs.getFileStatus(new Path("/hello")); + if (!parent.getGroup().equals(st.getGroup())) { + throw new Exception( + "group of new created file should be " + parent.getGroup() + ", but got " + st.getGroup()); + } + return; // only root can change the owner/group to others + // fs.setOwner(new Path("/hello"), null, "nogroup"); + // assertEquals("nogroup", fs.getFileStatus(new Path("/hello")).getGroup()); + } + + public void testCloseFileSystem() throws Exception { + Configuration conf = new Configuration(); + conf.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml")); + for (int i = 0; i < 5; i++) { + FileSystem fs = FileSystem.get(conf); + fs.getFileStatus(new Path("/hello")); + fs.close(); + } + } + + public void testReadahead() throws Exception { + FSDataOutputStream out = fs.create(new Path("/hello"), true); + for (int i = 0; i < 1000000; i++) { + out.writeBytes("hello\n"); + } + out.close(); + + // simulate reading a parquet file + int size = 1000000 * 6; + byte[] buf = new byte[128000]; + FSDataInputStream in = fs.open(new Path("/hello")); + in.read(size - 8, buf, 0, 8); + in.read(size - 5000, buf, 0, 3000); + in.close(); + in = fs.open(new Path("/hello")); + in.read(size - 8, buf, 0, 8); + in.read(size - 5000, buf, 0, 3000); + in.close(); + in = fs.open(new Path("/hello")); + in.read(2000000, buf, 0, 128000); + in.close(); + } + + public void testOutputStream() throws Exception { + FSDataOutputStream out = fs.create(new Path("/haha")); + if (!(out instanceof Syncable)) { + throw new RuntimeException("FSDataOutputStream should be syncable"); + } + if (!(out.getWrappedStream() instanceof Syncable)) { + throw new RuntimeException("BufferedOutputStream should be syncable"); + } + out.hflush(); + out.hsync(); + } + + public void testInputStream() throws Exception { + FSDataInputStream in = fs.open(new Path("/hello")); + if (!(in instanceof ByteBufferReadable)) { + throw new RuntimeException("Inputstream should be bytebufferreadable"); + } + if (!(in.getWrappedStream() instanceof ByteBufferReadable)) { + throw new RuntimeException("Inputstream should not be bytebufferreadable"); + } + + FSDataOutputStream out = fs.create(new Path("/hello"), true); + for (int i = 0; i < 1000000; i++) { + out.writeBytes("hello\n"); + } + out.close(); + + in = fs.open(new Path("/hello")); + ByteBuffer buf = ByteBuffer.allocateDirect(6 * 1000000); + buf.put((byte) in.read()); + while (buf.hasRemaining()) { + int readCount = in.read(buf); + if (readCount == -1) { + // this is probably a bug in the ParquetReader. We shouldn't have called + // readFully with a buffer + // that has more remaining than the amount of data in the stream. + throw new IOException("Reached the end of stream. Still have: " + buf.remaining() + " bytes left"); + } + } + /* + * FSDataOutputStream out = fs.create(new Path("/bigfile"), true); byte[] arr = + * new byte[1<<20]; for (int i=0; i<1024; i++) { out.write(arr); } out.close(); + * + * long start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile")); + * ByteBuffer buf = ByteBuffer.allocateDirect(1<<20); long total=0; while (true) + * { int n = in.read(buf); total += n; if (n < buf.capacity()) { break; } } long + * used = System.currentTimeMillis() - start; + * System.out.printf("ByteBuffer read %d throughput %f MB/s\n", total, + * total/1024.0/1024.0/used*1000); + * + * start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile")); + * total=0; while (true) { int n = in.read(buf); total += n; if (n < + * buf.capacity()) { break; } } used = System.currentTimeMillis() - start; + * System.out.printf("ByteBuffer read %d throughput %f MB/s\n", total, + * total/1024.0/1024.0/used*1000); + * + * start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile")); + * total=0; while (true) { int n = in.read(arr); total += n; if (n < + * buf.capacity()) { break; } } used = System.currentTimeMillis() - start; + * System.out.printf("Array read %d throughput %f MB/s\n", total, + * total/1024.0/1024.0/used*1000); + */ + } + + public void testReadStats() throws IOException { + FileSystem.Statistics statistics = FileSystem.getStatistics(fs.getScheme(), + ((FilterFileSystem) fs).getRawFileSystem().getClass()); + statistics.reset(); + Path path = new Path("/hello"); + FSDataOutputStream out = fs.create(path, true); + for (int i = 0; i < 1 << 20; i++) { + out.writeBytes("hello\n"); + } + out.close(); + FSDataInputStream in = fs.open(path); + + int readSize = 512 << 10; + + ByteBuffer buf = ByteBuffer.allocateDirect(readSize); + while (buf.hasRemaining()) { + in.read(buf); + } + assertEquals(readSize, statistics.getBytesRead()); + + in.seek(0); + buf = ByteBuffer.allocate(readSize); + while (buf.hasRemaining()) { + in.read(buf); + } + assertEquals(readSize * 2, statistics.getBytesRead()); + + in.read(0, new byte[3000], 0, 3000); + assertEquals(readSize * 2 + 3000, statistics.getBytesRead()); + + in.read(3000, new byte[6000], 0, 3000); + assertEquals(readSize * 2 + 3000 + 3000, statistics.getBytesRead()); + + } + + public void testChecksum() throws IOException { + Path f = new Path("/empty"); + FSDataOutputStream out = fs.create(f, true); + out.close(); + FileChecksum sum = fs.getFileChecksum(f); + assertEquals(new MD5MD5CRC32GzipFileChecksum(0, 0, new MD5Hash("70bc8f4b72a86921468bf8e8441dce51")), sum); + + f = new Path("/small"); + out = fs.create(f, true); + out.writeBytes("world\n"); + out.close(); + sum = fs.getFileChecksum(f); + assertEquals(new MD5MD5CRC32CastagnoliFileChecksum(512, 0, new MD5Hash("a74dcf6d5ba98e50ae0182c9d5d886fe")), + sum); + sum = fs.getFileChecksum(f, 5); + assertEquals(new MD5MD5CRC32CastagnoliFileChecksum(512, 0, new MD5Hash("05a157db1cc7549c82ec6f31f63fdb46")), + sum); + + f = new Path("/big"); + out = fs.create(f, true); + byte[] zeros = new byte[1024 * 1000]; + for (int i = 0; i < 150; i++) { + out.write(zeros); + } + out.close(); + sum = fs.getFileChecksum(f); + assertEquals( + new MD5MD5CRC32CastagnoliFileChecksum(512, 262144, new MD5Hash("7d04ac8132ad64988f7ba4d819cbde62")), + sum); + } + + public void testXattr() throws IOException { + Path p = new Path("/test-xattr"); + fs.delete(p, true); + fs.create(p); + assertEquals(null, fs.getXAttr(p, "x1")); + fs.setXAttr(p, "x1", new byte[1]); + fs.setXAttr(p, "x2", new byte[2]); + List names = fs.listXAttrs(p); + assertEquals(2, names.size()); + Map values = fs.getXAttrs(p); + assertEquals(2, values.size()); + assertEquals(1, values.get("x1").length); + assertEquals(2, values.get("x2").length); + fs.removeXAttr(p, "x2"); + names = fs.listXAttrs(p); + assertEquals(1, names.size()); + assertEquals("x1", names.get(0)); + + // stress + for (int i = 0; i < 100; i++) { + fs.setXAttr(p, "test" + i, new byte[4096]); + } + values = fs.getXAttrs(p); + assertEquals(101, values.size()); + // xattr should be remove together with file + fs.delete(p); + fs.create(p); + names = fs.listXAttrs(p); + assertEquals(0, names.size()); + } + + public void testAppend() throws Exception { + Path f = new Path("/tmp/testappend"); + fs.delete(f); + FSDataOutputStream out = fs.create(f); + out.write("hello".getBytes()); + out.close(); + FSDataOutputStream append = fs.append(f); + assertEquals(5, append.getPos()); + } + + public void testFlinkHadoopRecoverableWriter() throws Exception { + new HadoopRecoverableWriter(fs); + } + + public void testConcat() throws Exception { + Path trg = new Path("/tmp/concat"); + Path src1 = new Path("/tmp/concat1"); + Path src2 = new Path("/tmp/concat2"); + FSDataOutputStream ou = fs.create(trg); + ou.write("hello".getBytes()); + ou.close(); + FSDataOutputStream sou1 = fs.create(src1); + sou1.write("hello".getBytes()); + sou1.close(); + FSDataOutputStream sou2 = fs.create(src2); + sou2.write("hello".getBytes()); + sou2.close(); + fs.concat(trg, new Path[]{src1, src2}); + FSDataInputStream in = fs.open(trg); + assertEquals("hellohellohello", IOUtils.toString(in)); + in.close(); + // src should be deleted after concat + assertFalse(fs.exists(src1)); + assertFalse(fs.exists(src2)); + } + + public void testList() throws Exception { + Path p = new Path("/listsort"); + String[] org = new String[]{ + "/listsort/p4", + "/listsort/p2", + "/listsort/p1", + "/listsort/p3" + }; + fs.mkdirs(p); + for (String path : org) { + fs.mkdirs(new Path(path)); + } + FileStatus[] fss = fs.listStatus(p); + String[] res = new String[fss.length]; + for (int i = 0; i < fss.length; i++) { + res[i] = fss[i].getPath().toUri().getPath(); + } + Arrays.sort(org); + assertArrayEquals(org, res); + } + + private void writeFile(FileSystem fs, Path p, String content) throws IOException { + FSDataOutputStream ou = fs.create(p); + ou.write(content.getBytes()); + ou.close(); + } + + public FileSystem createNewFs(Configuration conf, String user, String[] group) throws IOException, InterruptedException { + if (user != null && group != null) { + UserGroupInformation root = UserGroupInformation.createUserForTesting(user, group); + return root.doAs((PrivilegedExceptionAction) () -> FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf)); + } + return FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf); + } + + public void testUsersAndGroups() throws Exception { + Path users1 = new Path("/tmp/users1"); + Path groups1 = new Path("/tmp/groups1"); + Path users2 = new Path("/tmp/users2"); + Path groups2 = new Path("/tmp/groups2"); + + writeFile(fs, users1, "user1:2001\n"); + writeFile(fs, groups1, "group1:3001:user1\n"); + writeFile(fs, users2, "user2:2001\n"); + writeFile(fs, groups2, "group2:3001:user2\n"); + + Configuration conf = new Configuration(cfg); + conf.set("juicefs.users", users1.toUri().getPath()); + conf.set("juicefs.groups", groups1.toUri().getPath()); + conf.set("juicefs.superuser", UserGroupInformation.getCurrentUser().getShortUserName()); + + FileSystem newFs = createNewFs(conf, null, null); + Path p = new Path("/test_user_group_file"); + newFs.create(p).close(); + newFs.setOwner(p, "user1", "group1"); + newFs.close(); + + conf.set("juicefs.users", users2.toUri().getPath()); + conf.set("juicefs.groups", groups2.toUri().getPath()); + newFs = createNewFs(conf, null, null); + FileStatus fileStatus = newFs.getFileStatus(p); + assertEquals("user2", fileStatus.getOwner()); + assertEquals("group2", fileStatus.getGroup()); + newFs.close(); + } + + public void testGroupPerm() throws Exception { + Path testPath = new Path("/test_group_perm"); + + Configuration conf = new Configuration(cfg); + conf.set("juicefs.supergroup", "hadoop"); + conf.set("juicefs.superuser", "hadoop"); + FileSystem uer1Fs = createNewFs(conf, "user1", new String[]{"hadoop"}); + uer1Fs.delete(testPath, true); + uer1Fs.mkdirs(testPath); + uer1Fs.setPermission(testPath, FsPermission.createImmutable((short) 0775)); + uer1Fs.close(); + + FileSystem uer2Fs = createNewFs(conf, "user2", new String[]{"hadoop"}); + Path f = new Path(testPath, "test_file"); + uer2Fs.create(f).close(); + FileStatus fileStatus = uer2Fs.getFileStatus(f); + assertEquals("user2", fileStatus.getOwner()); + uer2Fs.close(); + } + + public void testUmask() throws Exception { + Configuration conf = new Configuration(cfg); + conf.set("juicefs.umask", "077"); + UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); + FileSystem newFs = createNewFs(conf, currentUser.getShortUserName(), currentUser.getGroupNames()); + newFs.delete(new Path("/test_umask"), true); + newFs.mkdirs(new Path("/test_umask/dir")); + newFs.create(new Path("/test_umask/dir/f")).close(); + assertEquals(FsPermission.createImmutable((short) 0700), newFs.getFileStatus(new Path("/test_umask")).getPermission()); + assertEquals(FsPermission.createImmutable((short) 0700), newFs.getFileStatus(new Path("/test_umask/dir")).getPermission()); + assertEquals(FsPermission.createImmutable((short) 0600), newFs.getFileStatus(new Path("/test_umask/dir/f")).getPermission()); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/JuiceFSContract.java b/sdk/java/src/test/java/io/juicefs/contract/JuiceFSContract.java new file mode 100644 index 0000000..7b4ec38 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/JuiceFSContract.java @@ -0,0 +1,32 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractBondedFSContract; + +public class JuiceFSContract extends AbstractBondedFSContract { + + public JuiceFSContract(Configuration conf) { + super(conf); + addConfResource("contract/juicefs.xml"); + } + + @Override + public String getScheme() { + return "jfs"; + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestAppend.java b/sdk/java/src/test/java/io/juicefs/contract/TestAppend.java new file mode 100644 index 0000000..e655c3b --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestAppend.java @@ -0,0 +1,35 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractAppendTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + + +public class TestAppend extends AbstractContractAppendTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } + + @Override + public void teardown() throws Exception { + getFileSystem().delete(new Path(path("test"), "target")); + super.teardown(); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestConcat.java b/sdk/java/src/test/java/io/juicefs/contract/TestConcat.java new file mode 100644 index 0000000..d9fc5e4 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestConcat.java @@ -0,0 +1,28 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractConcatTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestConcat extends AbstractContractConcatTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestCreate.java b/sdk/java/src/test/java/io/juicefs/contract/TestCreate.java new file mode 100644 index 0000000..82757b6 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestCreate.java @@ -0,0 +1,27 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCreateTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestCreate extends AbstractContractCreateTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestDelete.java b/sdk/java/src/test/java/io/juicefs/contract/TestDelete.java new file mode 100644 index 0000000..a958747 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestDelete.java @@ -0,0 +1,27 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestDelete extends AbstractContractDeleteTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestGetFileStatus.java b/sdk/java/src/test/java/io/juicefs/contract/TestGetFileStatus.java new file mode 100644 index 0000000..f25cd7a --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestGetFileStatus.java @@ -0,0 +1,35 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + + +public class TestGetFileStatus extends AbstractContractGetFileStatusTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } + + @Override + public void setup() throws Exception { + super.setup(); + getFileSystem().delete(new Path("jfs:///test")); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestJuiceFileSystemContract.java b/sdk/java/src/test/java/io/juicefs/contract/TestJuiceFileSystemContract.java new file mode 100644 index 0000000..0670c1a --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestJuiceFileSystemContract.java @@ -0,0 +1,60 @@ +/* + * JuiceFS, Copyright 2021 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import io.juicefs.JuiceFileSystemTest; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeNotNull; + +public class TestJuiceFileSystemContract extends FileSystemContractBaseTest { + @Before + public void setUp() throws Exception { + Configuration cfg = new Configuration(); + cfg.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml")); + Thread.currentThread().interrupt(); + fs = FileSystem.get(cfg); + assumeNotNull(fs); + } + + public FileSystem createNewFs(Configuration conf) throws IOException { + return FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf); + } + + @Test + public void testMkdirsWithUmask() throws Exception { + Configuration conf = new Configuration(fs.getConf()); + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK); + FileSystem newFs = createNewFs(conf); + try { + final Path dir = path("newDir"); + assertTrue(newFs.mkdirs(dir, new FsPermission((short) 0777))); + FileStatus status = newFs.getFileStatus(dir); + assertTrue(status.isDirectory()); + assertEquals((short) 0715, status.getPermission().toShort()); + } finally { + newFs.close(); + } + } +} \ No newline at end of file diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestMkdir.java b/sdk/java/src/test/java/io/juicefs/contract/TestMkdir.java new file mode 100644 index 0000000..6f27f64 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestMkdir.java @@ -0,0 +1,27 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestMkdir extends AbstractContractMkdirTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestOpen.java b/sdk/java/src/test/java/io/juicefs/contract/TestOpen.java new file mode 100644 index 0000000..c780ac3 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestOpen.java @@ -0,0 +1,27 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractOpenTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestOpen extends AbstractContractOpenTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestRename.java b/sdk/java/src/test/java/io/juicefs/contract/TestRename.java new file mode 100644 index 0000000..bbd1349 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestRename.java @@ -0,0 +1,29 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + + +public class TestRename extends AbstractContractRenameTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestSeek.java b/sdk/java/src/test/java/io/juicefs/contract/TestSeek.java new file mode 100644 index 0000000..1ded6d7 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestSeek.java @@ -0,0 +1,34 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSeekTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestSeek extends AbstractContractSeekTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } + + @Override + public void teardown() throws Exception { + getFileSystem().delete(path("bigseekfile.txt")); + super.teardown(); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/contract/TestSetTimes.java b/sdk/java/src/test/java/io/juicefs/contract/TestSetTimes.java new file mode 100644 index 0000000..0a4cada --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/contract/TestSetTimes.java @@ -0,0 +1,28 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSetTimesTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestSetTimes extends AbstractContractSetTimesTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new JuiceFSContract(conf); + } +} diff --git a/sdk/java/src/test/java/io/juicefs/utils/HashTest.java b/sdk/java/src/test/java/io/juicefs/utils/HashTest.java new file mode 100644 index 0000000..669daa8 --- /dev/null +++ b/sdk/java/src/test/java/io/juicefs/utils/HashTest.java @@ -0,0 +1,109 @@ +/* + * JuiceFS, Copyright 2020 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.juicefs.utils; + +import com.google.common.collect.Lists; +import junit.framework.TestCase; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class HashTest extends TestCase { + private static List PATHS = new ArrayList() { + { + String prefix = "jfs:///tmp/file"; + for (int i = 0; i < 1_000; i++) { + add(prefix + i); + } + } + }; + + public void testConsitentHashCompat() { + ConsistentHash hash = new ConsistentHash<>(100, Lists.newArrayList()); + hash.addNode("192.168.1.1"); + hash.addNode("192.168.2.2"); + hash.addNode("192.168.3.3"); + hash.addNode("192.168.4.4"); + assertEquals("192.168.3.3", hash.get("123-0")); + assertEquals("192.168.4.4", hash.get("456-2")); + assertEquals("192.168.2.2", hash.get("789-3")); + } + + public void testConsitentHash() { + ConsistentHash hash = new ConsistentHash<>(100, getNodes()); + Map before = new HashMap<>(); + Map after = new HashMap<>(); + + for (String path : PATHS) { + before.put(path, hash.get(path)); + } + + hash.remove("Node4"); + for (String path : PATHS) { + after.put(path, hash.get(path)); + } + System.out.println("====== stdev"); + System.out.println("before:\t" + stdev(before)); + System.out.println("after:\t" + stdev(after)); + + System.out.println("====== (max - min)/avg"); + Map collect = after.values().stream() + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + Long max = Collections.max(collect.values()); + Long min = Collections.min(collect.values()); + long sum = collect.values().stream().mapToLong(i -> i).sum(); + System.out.println((double) (max - min) / ((double) sum / getNodes().size())); + + int count = 0; // total count of path that was moved + for (Map.Entry entry : before.entrySet()) { + String path = entry.getKey(); + String host = entry.getValue(); + if (!host.equals(after.get(path))) + count++; + } + double moveRatio = (double) count / before.size(); + System.out.println("move ratio:\t" + moveRatio); + + assertTrue(moveRatio < (double) 2 / getNodes().size()); + } + + private static double stdev(Map after) { + Map collect = after.values().stream() + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + SummaryStatistics statistics = new SummaryStatistics(); + for (Long value : collect.values()) { + statistics.addValue(value); + } + double sum = statistics.getSum(); + statistics.clear(); + for (Long value : collect.values()) { + statistics.addValue((double) value / sum); + } + + return statistics.getStandardDeviation(); + } + + private List getNodes() { + List nodes = Lists.newArrayList(); + for (int i = 0; i < 100; i++) { + nodes.add("Node" + i); + } + return nodes; + } +} diff --git a/sdk/java/src/test/test-spark.sh b/sdk/java/src/test/test-spark.sh new file mode 100755 index 0000000..24c270d --- /dev/null +++ b/sdk/java/src/test/test-spark.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail + +HADOOP_VERSION="2.7.7" +SPARK_VERSION="2.4.0" +EXAMPLES_JAR="spark-examples_2.11-2.4.0.jar" + +SPARK_DIST="spark-${SPARK_VERSION}-bin-without-hadoop" +SPARK_HOME="/opt/${SPARK_DIST}" +HADOOP_DIST="hadoop-${HADOOP_VERSION}" +HADOOP_HOME="/opt/${HADOOP_DIST}" + +curl -o "${HADOOP_HOME}.tar.gz" "https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_DIST}.tar.gz" +tar -xf "${HADOOP_HOME}.tar.gz" -C /opt + +export _JAVA_OPTIONS="-Djava.library.path=$(pwd)/../mount/libjfs" +export HADOOP_CLASSPATH="$(pwd)/target/juicefs-hadoop-0.1-SNAPSHOT.jar" +"${HADOOP_HOME}/bin/hadoop" --config "$(pwd)/conf" jar "${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar" grep hello output 'dfs[a-z.]+' + +curl -o "${SPARK_HOME}.tgz" "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz" +tar -xf "${SPARK_HOME}.tgz" -C /opt + +echo "export SPARK_DIST_CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath)" > "${SPARK_HOME}/conf/spark-env.sh" +echo "export HADOOP_CONF_DIR=$(pwd)/conf" >> "${SPARK_HOME}/conf/spark-env.sh" +cp "${SPARK_HOME}/examples/jars/${EXAMPLES_JAR}" /jfs/ + +"${SPARK_HOME}/bin/spark-submit" --class org.apache.spark.examples.JavaWordCount --master "local" "jfs:///${EXAMPLES_JAR}" "jfs:///hello" diff --git a/travis-setup-hdfs.sh b/travis-setup-hdfs.sh new file mode 100644 index 0000000..01c5214 --- /dev/null +++ b/travis-setup-hdfs.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# JuiceFS, Copyright 2021 Juicedata, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +sudo apt-get update +sudo apt-get install openjdk-8-jdk -y + +HADOOP_VERSION="2.10.1" +wget https://dlcdn.apache.org/hadoop/core/stable2/hadoop-2.10.1.tar.gz +mkdir ~/app +tar -zxf hadoop-${HADOOP_VERSION}.tar.gz -C ~/app + +sudo tee -a ~/.bashrc <> ~/.ssh/authorized_keys +chmod 700 ~/.ssh +chmod 600 ~/.ssh/authorized_keys +echo "StrictHostKeyChecking no" >> ~/.ssh/config + +sed -i 's/${JAVA_HOME}/\/usr\/lib\/jvm\/java-8-openjdk-amd64/g' ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/hadoop-env.sh + +sudo tee ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/core-site.xml < + + fs.defaultFS + hdfs://localhost:8020 + + + + hadoop.tmp.dir + ${HOME}/apps/tmp + + +EOF + +sudo tee ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/hdfs-site.xml < + + dfs.replication + 1 + + +EOF + +cd ~/app/hadoop-${HADOOP_VERSION}/bin +./hdfs namenode -format +cd ~/app/hadoop-${HADOOP_VERSION}/sbin +./start-dfs.sh + +jps + +echo "hdfs started successfully"