Author: Kevin Schoon [me@kevinschoon.com]
Hash: c0e86f90b4a12b7ace05caae62c26c6fe97396e1
Timestamp: Fri, 12 Apr 2024 16:10:21 +0000 (1 month ago)

+343 -16 +/-10 browse
add support for smart_http git cloning
add support for smart_http git cloning

This adds support for calling git-http-backend via a CGI like interface in
Ayllu which means that running an external fcgiwrap like service is no longer
required. The implementation is based on https://github.com/w4/rgit which is
an excellent Rust based cgit clone.
1diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md
2index 0180dbb..29a70bd 100644
3--- a/ATTRIBUTIONS.md
4+++ b/ATTRIBUTIONS.md
5 @@ -17,6 +17,10 @@ Ayllu would not be possible without many free software projects.
6
7 And many more, see the `Cargo.toml` file.
8
9+ ##### WTFPL
10+
11+ The "smart git" http backend is inspired / modified from [rgit](https://github.com/w4/rgit).
12+
13 ##### CC0 Zero 1.0
14
15 Inca style textile patterns are created by [@TrucomanX](https://openclipart.org/artist/TrucomanX)
16 diff --git a/Cargo.lock b/Cargo.lock
17index 660e41a..ca1b622 100644
18--- a/Cargo.lock
19+++ b/Cargo.lock
20 @@ -469,6 +469,7 @@ dependencies = [
21 "bytes",
22 "cookie",
23 "futures-util",
24+ "headers",
25 "http 1.1.0",
26 "http-body 1.0.0",
27 "http-body-util",
28 @@ -505,6 +506,7 @@ dependencies = [
29 "ayllu_database",
30 "ayllu_git",
31 "ayllu_rpc",
32+ "bytes",
33 "cc",
34 "clap 4.5.3",
35 "comrak",
36 @@ -513,6 +515,8 @@ dependencies = [
37 "futures",
38 "git2",
39 "globwalk",
40+ "headers",
41+ "httparse",
42 "lazy_static",
43 "libloading 0.8.3",
44 "libsqlite3-sys",
45 @@ -528,10 +532,12 @@ dependencies = [
46 "tabwriter",
47 "tarpc",
48 "tera",
49+ "thiserror",
50 "time",
51 "time-macros",
52 "tokei",
53 "tokio",
54+ "tokio-stream",
55 "tokio-util",
56 "toml 0.7.8",
57 "tower",
58 @@ -824,9 +830,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
59
60 [[package]]
61 name = "bytes"
62- version = "1.5.0"
63+ version = "1.6.0"
64 source = "registry+https://github.com/rust-lang/crates.io-index"
65- checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
66+ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
67
68 [[package]]
69 name = "cc"
70 @@ -2158,6 +2164,30 @@ dependencies = [
71 ]
72
73 [[package]]
74+ name = "headers"
75+ version = "0.4.0"
76+ source = "registry+https://github.com/rust-lang/crates.io-index"
77+ checksum = "322106e6bd0cba2d5ead589ddb8150a13d7c4217cf80d7c4f682ca994ccc6aa9"
78+ dependencies = [
79+ "base64 0.21.7",
80+ "bytes",
81+ "headers-core",
82+ "http 1.1.0",
83+ "httpdate",
84+ "mime",
85+ "sha1",
86+ ]
87+
88+ [[package]]
89+ name = "headers-core"
90+ version = "0.3.0"
91+ source = "registry+https://github.com/rust-lang/crates.io-index"
92+ checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4"
93+ dependencies = [
94+ "http 1.1.0",
95+ ]
96+
97+ [[package]]
98 name = "heck"
99 version = "0.4.1"
100 source = "registry+https://github.com/rust-lang/crates.io-index"
101 diff --git a/ayllu/Cargo.toml b/ayllu/Cargo.toml
102index 4188ab4..00d511a 100644
103--- a/ayllu/Cargo.toml
104+++ b/ayllu/Cargo.toml
105 @@ -51,7 +51,7 @@ globwalk = "0.8.1"
106 anyhow = "1.0.75"
107 tokio = { version = "1.32.0", features = ["full"] }
108 axum = { version = "0.7.3", features = ["macros"] }
109- axum-extra = { version = "0.9.1", features = ["cookie"] }
110+ axum-extra = { version = "0.9.1", features = ["cookie", "typed-header"] }
111 tokio-util = { version = "0.7.9", features = ["io", "compat"] }
112 tower-http = { version = "0.5.0", features = ["tracing", "add-extension", "trace", "normalize-path"] }
113 tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
114 @@ -61,6 +61,11 @@ mime = "0.3.17"
115 async-trait = "0.1.74"
116 webfinger = "0.5.1"
117 tarpc = { version = "0.34.0", features = ["full"] }
118+ bytes = "1.6.0"
119+ tokio-stream = "0.1.15"
120+ httparse = "1.8.0"
121+ thiserror = "1.0.58"
122+ headers = "0.4.0"
123
124 # NOTE: this must be cautiously updated along with sqlx and rusqlite.
125 [dependencies.libsqlite3-sys]
126 diff --git a/ayllu/src/config.rs b/ayllu/src/config.rs
127index 2a0298e..9d400b0 100644
128--- a/ayllu/src/config.rs
129+++ b/ayllu/src/config.rs
130 @@ -19,7 +19,11 @@ const BANNED_COLLECTION_NAMES: &[&str] = &[
131 "authors", "about", "api", "browse", "config", "rss", "static", "discuss",
132 ];
133
134- fn default_bool() -> bool {
135+ fn default_true() -> bool {
136+ true
137+ }
138+
139+ fn default_false() -> bool {
140 false
141 }
142
143 @@ -215,11 +219,22 @@ pub struct Languages {
144 }
145
146 #[derive(Deserialize, Serialize, Clone, Debug)]
147+ /// git specific options
148+ pub struct Git {
149+ #[serde(default = "default_true")]
150+ pub smart_http: bool,
151+ #[serde(default = "default_true")]
152+ pub export_all: bool,
153+ pub clone_url: Option<String>,
154+ }
155+
156+ #[derive(Deserialize, Serialize, Clone, Debug)]
157 pub struct Config {
158 #[serde(default = "Config::default_site_name")]
159 pub site_name: String,
160 pub origin: String,
161 pub domain: Option<String>,
162+ pub git: Git,
163 #[serde(default = "Config::default_worker_threads")]
164 pub worker_threads: NonZeroUsize,
165 #[serde(default = "Config::default_max_blocking_threads")]
166 @@ -228,11 +243,10 @@ pub struct Config {
167 pub blurb: Option<String>,
168 #[serde(default = "Config::default_robots_txt")]
169 pub robots: String,
170- #[serde(default = "default_bool")]
171+ #[serde(default = "default_false")]
172 pub subpath_mode: bool,
173 #[serde(default = "Config::default_log_level")]
174 pub log_level: String,
175- pub git_clone_url: Option<String>,
176 pub default_branch: Option<String>,
177 pub rss_time_to_live: Option<i64>,
178 pub web: Web,
179 diff --git a/ayllu/src/web2/middleware/repository.rs b/ayllu/src/web2/middleware/repository.rs
180index 3c1c94a..7ce29da 100644
181--- a/ayllu/src/web2/middleware/repository.rs
182+++ b/ayllu/src/web2/middleware/repository.rs
183 @@ -28,8 +28,11 @@ pub struct Preamble {
184 pub start_time: SystemTime,
185 pub repo_path: PathBuf,
186 pub is_empty: bool,
187+ // if either the repository or the collection is marked hidden
188+ pub hidden: bool,
189 pub repo_name: String,
190 pub collection_name: String,
191+ pub collection_path: PathBuf,
192 pub refname: String,
193 pub config: GitConfig,
194 pub file_path: Option<PathBuf>,
195 @@ -46,12 +49,16 @@ impl Preamble {
196 file_path: Option<String>,
197 ) -> Result<Self, Error> {
198 let start_time = SystemTime::now();
199- let repo_path = match system_config
200+ let (collection_path, repo_path, hidden) = match system_config
201 .collections
202 .iter()
203 .find(|collection| collection.name == collection_name)
204 {
205- Some(collection) => PathBuf::from(format!("{}/{}", collection.path, repo_name)),
206+ Some(collection) => (
207+ PathBuf::from(collection.path.clone()),
208+ PathBuf::from(format!("{}/{}", collection.path, repo_name)),
209+ collection.hidden.is_some_and(|hidden| hidden),
210+ ),
211 None => {
212 return Err(Error::NotFound(String::from("collection not found")));
213 }
214 @@ -59,6 +66,7 @@ impl Preamble {
215 let repository = Repository::new(&repo_path)?;
216 let is_empty = repository.is_empty()?;
217 let config = repository.config()?;
218+ let hidden = hidden || config.hidden.is_some_and(|hidden| hidden);
219 let (refname, latest_commit) = if is_empty {
220 (String::new(), None)
221 } else if let Some(commitish) = commitish {
222 @@ -80,7 +88,9 @@ impl Preamble {
223 repo_path,
224 is_empty,
225 repo_name,
226+ hidden,
227 collection_name,
228+ collection_path,
229 refname,
230 config,
231 file_path: file_path.map(PathBuf::from),
232 diff --git a/ayllu/src/web2/routes/git.rs b/ayllu/src/web2/routes/git.rs
233new file mode 100644
234index 0000000..8c28c8c
235--- /dev/null
236+++ b/ayllu/src/web2/routes/git.rs
237 @@ -0,0 +1,237 @@
238+ use std::{io, io::ErrorKind, path::Path, process::Stdio, str::FromStr};
239+
240+ use axum::{
241+ body::Body,
242+ http::{
243+ header::{HeaderMap, HeaderName, HeaderValue},
244+ Method, StatusCode, Uri,
245+ },
246+ response::{IntoResponse, Response},
247+ Extension,
248+ };
249+ use axum_extra::TypedHeader;
250+
251+ use bytes::{Buf, Bytes, BytesMut};
252+ use futures::TryStreamExt;
253+ use tokio::{
254+ io::AsyncReadExt,
255+ process::{Child, ChildStderr, ChildStdout, Command},
256+ sync::mpsc,
257+ };
258+ use tokio_stream::wrappers::ReceiverStream;
259+ use tokio_util::io::StreamReader;
260+ use tracing::{debug, error, warn};
261+
262+ use crate::config::Config;
263+ use crate::web2::{error::Error, middleware::repository::Preamble};
264+
265+ const GIT_HEADERS: &[(&str, &str)] = &[
266+ ("Content-Type", "CONTENT_TYPE"),
267+ ("Content-Length", "CONTENT_LENGTH"),
268+ ("Git-Protocol", "GIT_PROTOCOL"),
269+ ("Content-Encoding", "HTTP_CONTENT_ENCODING"),
270+ ];
271+
272+ struct CgiResponse(usize, StatusCode, HeaderMap);
273+
274+ /// parse cgi headers returned from git-http-backend
275+ fn parse_cgi_headers(buffer: &[u8]) -> Result<Option<CgiResponse>, Error> {
276+ let mut headers = [httparse::EMPTY_HEADER; 10];
277+ let (body_offset, headers) = match httparse::parse_headers(buffer, &mut headers)
278+ .map_err(|_| Error::Message("failed to parse headers".into()))?
279+ {
280+ httparse::Status::Complete(v) => v,
281+ httparse::Status::Partial => return Ok(None),
282+ };
283+
284+ let mut response_headers = HeaderMap::new();
285+
286+ for header in headers {
287+ let name = HeaderName::from_str(header.name)
288+ .map_err(|_| Error::Message("Failed to parse header name from Git over CGI".into()))?;
289+ let value = HeaderValue::from_bytes(header.value)
290+ .map_err(|_| Error::Message("Failed to parse header value from Git over CGI".into()))?;
291+ debug!("git-http-backend returned header: {:?}/{:?}", name, value);
292+ response_headers.insert(name, value);
293+ }
294+
295+ let mut status_code = StatusCode::OK;
296+
297+ if let Some(status) = response_headers.remove("Status").filter(|s| s.len() >= 3) {
298+ let status = &status.as_ref()[..3];
299+
300+ if let Ok(status) = StatusCode::from_bytes(status) {
301+ status_code = status
302+ }
303+ }
304+
305+ Ok(Some(CgiResponse(
306+ body_offset,
307+ status_code,
308+ response_headers,
309+ )))
310+ }
311+
312+ pub async fn handle(
313+ Extension(preamble): Extension<Preamble>,
314+ Extension(cfg): Extension<Config>,
315+ TypedHeader(user_agent): TypedHeader<headers::UserAgent>,
316+ method: Method,
317+ uri: Uri,
318+ headers: HeaderMap,
319+ body: Body,
320+ ) -> Result<Response, Error> {
321+ if !cfg.git.smart_http || preamble.hidden {
322+ // if the smart_http server is disabled or the repository is hidden
323+ // always return 404.
324+ return Err(Error::NotFound("repository not found".to_string()));
325+ }
326+
327+ log::info!(
328+ "handling git clone operation on {}/{} [{}]",
329+ preamble.collection_name,
330+ preamble.repo_name,
331+ user_agent,
332+ );
333+
334+ let uri_path = Path::new(uri.path().trim_start_matches('/'));
335+ let path_translated = preamble.repo_path.join(uri_path);
336+
337+ let mut command = Command::new("git");
338+
339+ for (header, env) in GIT_HEADERS {
340+ if let Some(value) = headers.get(*header) {
341+ let value = value
342+ .to_str()
343+ .map_err(|_| Error::Message("invalid header".into()))?;
344+ debug!("client header: {} {}", header, value);
345+ command.env(env, value);
346+ }
347+ }
348+
349+ let mut cmd = command
350+ .arg("http-backend")
351+ .env("REQUEST_METHOD", method.as_str())
352+ .env(
353+ "PATH_TRANSLATED",
354+ path_translated.to_string_lossy().to_string(),
355+ )
356+ .env("QUERY_STRING", uri.query().unwrap_or(""))
357+ .stdin(Stdio::piped())
358+ .stdout(Stdio::piped())
359+ .stderr(Stdio::piped())
360+ .kill_on_drop(true);
361+
362+ log::debug!("calling {:?}", cmd);
363+
364+ if cfg.git.export_all {
365+ cmd = cmd.env("GIT_HTTP_EXPORT_ALL", "true");
366+ };
367+
368+ let mut child = cmd.spawn()?;
369+
370+ let mut stdout = child.stdout.take().unwrap();
371+ let mut stderr = child.stderr.take().unwrap();
372+ let mut stdin = child.stdin.take().unwrap();
373+
374+ // read request body and forward to stdin
375+ let mut body = StreamReader::new(
376+ body.into_data_stream()
377+ .map_err(|e| std::io::Error::new(ErrorKind::Other, e)),
378+ );
379+
380+ let n_bytes = tokio::io::copy_buf(&mut body, &mut stdin).await?;
381+ debug!("read {} bytes from client", n_bytes);
382+
383+ // wait for the headers back from git http-backend
384+ let mut out_buf = BytesMut::with_capacity(1024);
385+ let cgi_response = loop {
386+ let n = stdout.read_buf(&mut out_buf).await?;
387+ if n == 0 {
388+ break None;
389+ }
390+
391+ if let Some(response) = parse_cgi_headers(&out_buf)? {
392+ out_buf.advance(response.0);
393+ debug!("read {} bytes", response.0);
394+ break Some((response.1, response.2));
395+ }
396+ };
397+
398+ // if the `headers` loop broke with `None`, the `git http-backend` didn't return any parseable
399+ // headers so there's no reason for us to continue. there may be something in stderr for us
400+ // though.
401+ let Some(cgi_response) = cgi_response else {
402+ print_status(&mut child, &mut stderr).await;
403+ return Err(Error::Message(
404+ "incomplete response from git-http-backend".to_string(),
405+ ));
406+ };
407+
408+ // stream the response back to the client
409+ let (body_send, body_recv) = mpsc::channel(8);
410+ tokio::spawn(forward_response_to_client(
411+ out_buf, body_send, stdout, stderr, child,
412+ ));
413+
414+ let mut resp = Body::from_stream(ReceiverStream::new(body_recv)).into_response();
415+
416+ let status = resp.status_mut();
417+ *status = cgi_response.0;
418+
419+ let resp_headers = resp.headers_mut();
420+
421+ for header in cgi_response.1 {
422+ let name = header.0.unwrap();
423+ resp_headers.insert(name, header.1.clone());
424+ }
425+
426+ Ok(resp)
427+ }
428+
429+ /// Forwards the entirety of `stdout` to `body_send`, printing subprocess stderr and status on
430+ /// completion.
431+ async fn forward_response_to_client(
432+ mut out_buf: BytesMut,
433+ body_send: mpsc::Sender<Result<Bytes, io::Error>>,
434+ mut stdout: ChildStdout,
435+ mut stderr: ChildStderr,
436+ mut child: Child,
437+ ) {
438+ loop {
439+ let (out, mut end) = match stdout.read_buf(&mut out_buf).await {
440+ Ok(0) => (Ok(out_buf.split().freeze()), true),
441+ Ok(n) => (Ok(out_buf.split_to(n).freeze()), false),
442+ Err(e) => (Err(e), true),
443+ };
444+
445+ if body_send.send(out).await.is_err() {
446+ warn!("Receiver went away during git http-backend call");
447+ end = true;
448+ }
449+
450+ if end {
451+ break;
452+ }
453+ }
454+
455+ print_status(&mut child, &mut stderr).await;
456+ }
457+
458+ /// Prints the exit status of the `git` subprocess.
459+ async fn print_status(child: &mut Child, stderr: &mut ChildStderr) {
460+ match tokio::try_join!(child.wait(), read_stderr(stderr)) {
461+ Ok((status, stderr)) if status.success() => {
462+ debug!(stderr, "git http-backend successfully shutdown");
463+ }
464+ Ok((status, stderr)) => error!(stderr, "git http-backend exited with status code {status}"),
465+ Err(e) => error!("Failed to wait on git http-backend shutdown: {e}"),
466+ }
467+ }
468+
469+ /// Reads the entirety of stderr for the given handle.
470+ async fn read_stderr(stderr: &mut ChildStderr) -> io::Result<String> {
471+ let mut stderr_out = Vec::new();
472+ stderr.read_to_end(&mut stderr_out).await?;
473+ Ok(String::from_utf8_lossy(&stderr_out).into_owned())
474+ }
475 diff --git a/ayllu/src/web2/routes/mod.rs b/ayllu/src/web2/routes/mod.rs
476index de35b73..e7f4419 100644
477--- a/ayllu/src/web2/routes/mod.rs
478+++ b/ayllu/src/web2/routes/mod.rs
479 @@ -8,6 +8,7 @@ pub mod chart;
480 pub mod commit;
481 pub mod config;
482 pub mod finger;
483+ pub mod git;
484 pub mod index;
485 pub mod log;
486 pub mod mail;
487 diff --git a/ayllu/src/web2/routes/repo.rs b/ayllu/src/web2/routes/repo.rs
488index aa98bb7..b804033 100644
489--- a/ayllu/src/web2/routes/repo.rs
490+++ b/ayllu/src/web2/routes/repo.rs
491 @@ -27,9 +27,8 @@ use crate::web2::middleware::template::Template;
492 use crate::web2::navigation;
493 use crate::web2::util;
494
495- use ayllu_api::xmpp::ChannelStat;
496 use ayllu_database::Wrapper as Database;
497- use ayllu_git::{ChatKind, Commit, Config as GitConfig, TreeEntry, Wrapper};
498+ use ayllu_git::{Commit, Config as GitConfig, TreeEntry, Wrapper};
499 use ayllu_rpc::tarpc::context;
500
501 const README_FILES: [&str; 6] = [
502 @@ -295,7 +294,7 @@ pub async fn serve(
503 cfg.origin, preamble.collection_name, preamble.repo_name
504 ),
505 );
506- let git_clone_url = match cfg.git_clone_url {
507+ let git_clone_url = match cfg.git.clone_url {
508 Some(base) => {
509 let clone_url = format!(
510 "{}:{}/{}.git",
511 diff --git a/ayllu/src/web2/server.rs b/ayllu/src/web2/server.rs
512index 51f4c10..4ef8e00 100644
513--- a/ayllu/src/web2/server.rs
514+++ b/ayllu/src/web2/server.rs
515 @@ -5,8 +5,8 @@ use std::net::SocketAddrV4;
516 use std::sync::Arc;
517
518 use axum::{
519- body::Body, extract::Request, middleware::from_fn_with_state, routing, Extension, Router,
520- ServiceExt,
521+ body::Body, extract::Request, middleware::from_fn_with_state, routing, routing::MethodFilter,
522+ Extension, Router, ServiceExt,
523 };
524 use globwalk::glob_builder;
525 use tera::Tera;
526 @@ -36,6 +36,7 @@ use crate::web2::routes::chart;
527 use crate::web2::routes::commit;
528 use crate::web2::routes::config;
529 use crate::web2::routes::finger;
530+ use crate::web2::routes::git;
531 use crate::web2::routes::index;
532 use crate::web2::routes::log as log_route;
533 use crate::web2::routes::mail;
534 @@ -256,6 +257,15 @@ pub async fn serve(cfg: &Config) -> Result<(), Box<dyn Error>> {
535 .route("/refs/branches", routing::get(refs::branches))
536 .route("/refs/tags", routing::get(refs::tags))
537 .route("/refs/archive/:ref_id", routing::get(refs::archive))
538+ // git smart http clone
539+ // /(HEAD|info/refs|objects/info/.*|git-upload-pack).*$
540+ .route("/HEAD", routing::get(git::handle))
541+ .route("/info/refs", routing::get(git::handle))
542+ .route("/objects/info/:id", routing::get(git::handle))
543+ .route(
544+ "/git-upload-pack",
545+ routing::on(MethodFilter::GET.or(MethodFilter::POST), git::handle),
546+ )
547 .layer(from_fn_with_state(cfg.clone(), repository::middleware))
548 .layer(from_fn_with_state(
549 Arc::new(cfg.clone()),
550 diff --git a/config.example.toml b/config.example.toml
551index da676f7..1a9f152 100644
552--- a/config.example.toml
553+++ b/config.example.toml
554 @@ -27,9 +27,7 @@ sysadmin = "admin@ayllu-forge.org"
555 # after it has been updated.
556 # jobs_socket_path = "/var/run/user/1000/ayllu-jobs.sock"
557
558- # URL to suggest for cloning via SSH, if unspecified git cloning option will
559- # not appear in the repository page.
560- git_clone_url = "git@localhost"
561+
562
563 # logging level
564 log_level = "debug"
565 @@ -62,6 +60,25 @@ blurb = """
566 # cases.
567 rss_time_to_live = 3600
568
569+ # Git HTTP server options
570+
571+ [git]
572+ # Global option to enable the Git "Smart HTTP" server which will serve
573+ # non-hidden repositories for cloning over HTTP. The default is true but if
574+ # set to false it will globally disable all cloning which you might want to do
575+ # if you want to run your own fcgi server e.g.
576+ smart_http = true
577+ # URL to suggest for cloning via SSH, if unspecified git cloning option will
578+ # not appear in the repository page.
579+ clone_url = "git@localhost"
580+
581+ # this toggles the GIT_HTTP_EXPORT_ALL option for git-http-backend which will
582+ # allow cloning of all repositories unless they are marked as private. If this
583+ # option is not toggled then the file git-daemon-export-ok exist in the repository
584+ # for cloing to be permitted.
585+ export_all = false
586+
587+
588 # List of authors associated with this site as returned via webfinger queries
589 # see https://datatracker.ietf.org/doc/html/rfc7033 and https://webfinger.net/
590 # for more details.