Author: Jason White [github@jasonwhite.io]
Committer: GitHub [noreply@github.com] Fri, 08 Jan 2021 01:46:54 +0000
Hash: 2ee53f0e51b45fd3240f14a2bb656ca6d41a9a4c
Timestamp: Fri, 08 Jan 2021 01:46:54 +0000 (3 years ago)

+180 -35 +/-6 browse
Add support for local storage (#21)
Add support for local storage (#21)

1diff --git a/CHANGELOG.md b/CHANGELOG.md
2new file mode 100644
3index 0000000..a194294
4--- /dev/null
5+++ b/CHANGELOG.md
6 @@ -0,0 +1,23 @@
7+ # Changelog
8+
9+ ## v0.3
10+
11+ - Created this changelog.
12+
13+ ### Breaking changes
14+
15+ - Changed the command line interface so that each storage backend is a separate
16+ subcommand.
17+
18+ ### Features
19+
20+ - Added a local-disk only storage backend.
21+ - Added support for more configuration via environment variables:
22+ - `RUDOLFS_HOST` (same as `--host`)
23+ - `RUDOLFS_KEY` (same as `--key`)
24+ - `RUDOLFS_CACHE_DIR` (same as `--cache-dir`)
25+ - `RUDOLFS_MAX_CACHE_SIZE` (same as `--max-cache-size`)
26+ - `RUDOLFS_LOG` (same as `--log-level`)
27+ - `RUDOLFS_S3_BUCKET` (same as `--bucket` when using S3 storage)
28+ - `RUDOLFS_S3_PREFIX` (same as `--prefix` when using S3 storage)
29+ - `RUDOLFS_LOCAL_PATH` (same as `--path` when using local disk storage)
30 diff --git a/README.md b/README.md
31index 97979d0..c54a8e7 100644
32--- a/README.md
33+++ b/README.md
34 @@ -6,7 +6,10 @@ A high-performance, caching Git LFS server with an AWS S3 back-end.
35
36 ## Features
37
38- - AWS S3 permanent storage back-end.
39+ - Multiple backends:
40+
41+ 1. AWS S3 backend with an optional local disk cache.
42+ 2. Local disk backend.
43
44 - A configurable local disk cache to speed up downloads (and reduce your
45 S3 bill).
46 @@ -73,9 +76,10 @@ KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
47 cargo run -- \
48 --cache-dir cache \
49 --host localhost:8080 \
50- --s3-bucket foobar \
51 --max-cache-size 10GiB \
52- --key $KEY
53+ --key $KEY \
54+ s3 \
55+ --bucket foobar
56 ```
57
58 **Note**: Always use a different S3 bucket, cache directory, and encryption key
59 diff --git a/src/help.md b/src/help.md
60new file mode 100644
61index 0000000..1e64a0d
62--- /dev/null
63+++ b/src/help.md
64 @@ -0,0 +1,26 @@
65+ EXAMPLES
66+
67+ To start a local disk server with encryption enabled:
68+
69+ # Change this to the output of `openssl rand -hex 32`.
70+ export RUDOLFS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
71+
72+ rudolfs --host=localhost:8080 local --path=path/to/lfs/data
73+
74+ To start an S3 server with encryption enabled:
75+
76+ # Your AWS credentials.
77+ export AWS_ACCESS_KEY_ID=XXXXXXXXXXXXXXXXXXXX
78+ export AWS_SECRET_ACCESS_KEY=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
79+ export AWS_DEFAULT_REGION=us-west-1
80+
81+ # Change this to the output of `openssl rand -hex 32`.
82+ export RUDOLFS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
83+
84+ rudolfs \
85+ --host=localhost:8080 \
86+ --cache-dir=my_local_cache \
87+ s3 --bucket=my_bucket
88+
89+ NOTE: Always use a different S3 bucket, cache directory, and encryption key
90+ than what you use in your production environment.
91 diff --git a/src/main.rs b/src/main.rs
92index 921e83f..a67626e 100644
93--- a/src/main.rs
94+++ b/src/main.rs
95 @@ -45,43 +45,95 @@ use crate::storage::{Cached, Disk, Encrypted, Retrying, Storage, Verify, S3};
96 #[cfg(feature = "faulty")]
97 use crate::storage::Faulty;
98
99+ // Additional help to append to the end when `--help` is specified.
100+ static AFTER_HELP: &str = include_str!("help.md");
101+
102 #[derive(StructOpt)]
103+ #[structopt(after_help = AFTER_HELP)]
104 struct Args {
105+ #[structopt(flatten)]
106+ global: GlobalArgs,
107+
108+ #[structopt(subcommand)]
109+ backend: Backend,
110+ }
111+
112+ #[derive(StructOpt)]
113+ enum Backend {
114+ /// Starts the server with S3 as the storage backend.
115+ #[structopt(name = "s3")]
116+ S3(S3Args),
117+
118+ /// Starts the server with the local disk as the storage backend.
119+ #[structopt(name = "local")]
120+ Local(LocalArgs),
121+ }
122+
123+ #[derive(StructOpt)]
124+ struct GlobalArgs {
125 /// Host or address to listen on.
126- #[structopt(long = "host", default_value = "0.0.0.0:8080")]
127+ #[structopt(
128+ long = "host",
129+ default_value = "0.0.0.0:8080",
130+ env = "RUDOLFS_HOST"
131+ )]
132 host: String,
133
134- /// Root directory of the object cache.
135- #[structopt(long = "cache-dir")]
136+ /// Encryption key to use.
137+ #[structopt(
138+ long = "key",
139+ parse(try_from_str = FromHex::from_hex),
140+ env = "RUDOLFS_KEY"
141+ )]
142+ key: [u8; 32],
143+
144+ /// Root directory of the object cache. If not specified or if the local
145+ /// disk is the storage backend, then no local disk cache will be used.
146+ #[structopt(long = "cache-dir", env = "RUDOLFS_CACHE_DIR")]
147 cache_dir: Option<PathBuf>,
148
149+ /// Maximum size of the cache, in bytes. Set to 0 for an unlimited cache
150+ /// size.
151+ #[structopt(
152+ long = "max-cache-size",
153+ default_value = "50 GiB",
154+ env = "RUDOLFS_MAX_CACHE_SIZE"
155+ )]
156+ max_cache_size: human_size::Size,
157+
158 /// Logging level to use.
159- #[structopt(long = "log-level", default_value = "info")]
160+ #[structopt(
161+ long = "log-level",
162+ default_value = "info",
163+ env = "RUDOLFS_LOG"
164+ )]
165 log_level: log::LevelFilter,
166+ }
167
168+ #[derive(StructOpt)]
169+ struct S3Args {
170 /// Amazon S3 bucket to use.
171- #[structopt(long = "s3-bucket")]
172- s3_bucket: String,
173+ #[structopt(long, env = "RUDOLFS_S3_BUCKET")]
174+ bucket: String,
175
176 /// Amazon S3 path prefix to use.
177- #[structopt(long = "s3-prefix", default_value = "lfs")]
178- s3_prefix: String,
179-
180- /// Encryption key to use.
181- #[structopt(long = "key", parse(try_from_str = FromHex::from_hex))]
182- key: [u8; 32],
183+ #[structopt(long, default_value = "lfs", env = "RUDOLFS_S3_PREFIX")]
184+ prefix: String,
185+ }
186
187- /// Maximum size of the cache, in bytes. Set to 0 for an unlimited cache
188- /// size.
189- #[structopt(long = "max-cache-size", default_value = "50 GiB")]
190- max_cache_size: human_size::Size,
191+ #[derive(StructOpt)]
192+ struct LocalArgs {
193+ /// Directory where the LFS files should be stored. This directory will be
194+ /// created if it does not exist.
195+ #[structopt(long, env = "RUDOLFS_LOCAL_PATH")]
196+ path: PathBuf,
197 }
198
199 impl Args {
200 async fn main(self) -> Result<(), Box<dyn std::error::Error>> {
201 // Initialize logging.
202 let mut logger_builder = pretty_env_logger::formatted_timed_builder();
203- logger_builder.filter_module("rudolfs", self.log_level);
204+ logger_builder.filter_module("rudolfs", self.global.log_level);
205
206 if let Ok(env) = std::env::var("RUST_LOG") {
207 // Support the addition of RUST_LOG to help with debugging
208 @@ -93,13 +145,30 @@ impl Args {
209
210 // Find a socket address to bind to. This will resolve domain names.
211 let addr = self
212+ .global
213 .host
214 .to_socket_addrs()?
215 .next()
216 .unwrap_or_else(|| SocketAddr::from(([0, 0, 0, 0], 8080)));
217
218 log::info!("Initializing storage...");
219- let s3 = S3::new(self.s3_bucket, self.s3_prefix)
220+
221+ match self.backend {
222+ Backend::S3(s3) => s3.run(addr, self.global).await?,
223+ Backend::Local(local) => local.run(addr, self.global).await?,
224+ }
225+
226+ Ok(())
227+ }
228+ }
229+
230+ impl S3Args {
231+ async fn run(
232+ self,
233+ addr: SocketAddr,
234+ global_args: GlobalArgs,
235+ ) -> Result<(), Box<dyn std::error::Error>> {
236+ let s3 = S3::new(self.bucket, self.prefix)
237 .map_err(Error::from)
238 .await?;
239
240 @@ -110,12 +179,13 @@ impl Args {
241 #[cfg(feature = "faulty")]
242 let s3 = Faulty::new(s3);
243
244- match self.cache_dir {
245+ match global_args.cache_dir {
246 Some(cache_dir) => {
247 // Convert cache size to bytes.
248- let max_cache_size =
249- self.max_cache_size.into::<human_size::Byte>().value()
250- as u64;
251+ let max_cache_size = global_args
252+ .max_cache_size
253+ .into::<human_size::Byte>()
254+ .value() as u64;
255
256 // Use disk storage as a cache.
257 let disk = Disk::new(cache_dir).map_err(Error::from).await?;
258 @@ -124,15 +194,32 @@ impl Args {
259 let disk = Faulty::new(disk);
260
261 let cache = Cached::new(max_cache_size, disk, s3).await?;
262- let storage = Verify::new(Encrypted::new(self.key, cache));
263+ let storage =
264+ Verify::new(Encrypted::new(global_args.key, cache));
265 run_server(storage, &addr).await?;
266 }
267 None => {
268- let storage = Verify::new(Encrypted::new(self.key, s3));
269+ let storage = Verify::new(Encrypted::new(global_args.key, s3));
270 run_server(storage, &addr).await?;
271 }
272- };
273+ }
274+
275+ Ok(())
276+ }
277+ }
278+
279+ impl LocalArgs {
280+ async fn run(
281+ self,
282+ addr: SocketAddr,
283+ global_args: GlobalArgs,
284+ ) -> Result<(), Box<dyn std::error::Error>> {
285+ let storage = Disk::new(self.path).map_err(Error::from).await?;
286+ let storage = Verify::new(Encrypted::new(global_args.key, storage));
287+
288+ log::info!("Local disk storage initialized.");
289
290+ run_server(storage, &addr).await?;
291 Ok(())
292 }
293 }
294 diff --git a/src/storage/disk.rs b/src/storage/disk.rs
295index a6b3ae6..bb49205 100644
296--- a/src/storage/disk.rs
297+++ b/src/storage/disk.rs
298 @@ -43,6 +43,8 @@ pub struct Backend {
299
300 impl Backend {
301 pub async fn new(root: PathBuf) -> Result<Self, io::Error> {
302+ fs::create_dir_all(&root).await?;
303+
304 // TODO: Clean out files in the "incomplete" folder?
305 Ok(Backend { root })
306 }
307 diff --git a/src/storage/s3.rs b/src/storage/s3.rs
308index 0b79336..f9cc61c 100644
309--- a/src/storage/s3.rs
310+++ b/src/storage/s3.rs
311 @@ -169,13 +169,16 @@ impl<C> Backend<C> {
312
313 // We need to retry here so that any fake S3 services have a chance to
314 // start up alongside Rudolfs.
315- (|| async {
316- // Note that we don't retry certain failures, like credential or
317- // missing bucket errors. These are unlikely to be transient errors.
318- c.head_bucket(req.clone())
319- .await
320- .map_err(InitError::from)
321- .map_err(InitError::into_backoff)
322+ (|| {
323+ async {
324+ // Note that we don't retry certain failures, like credential or
325+ // missing bucket errors. These are unlikely to be transient
326+ // errors.
327+ c.head_bucket(req.clone())
328+ .await
329+ .map_err(InitError::from)
330+ .map_err(InitError::into_backoff)
331+ }
332 })
333 .retry(ExponentialBackoff::default())
334 .await?;