From 8a88281040d5c08b872565dbe1bf65b527e17083 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 17:23:42 +0200 Subject: [PATCH 01/12] download: fix POST option for wget --- master_changes.md | 2 ++ src/repository/opamDownload.ml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index 2efe7e53f99..2a02184901e 100644 --- a/master_changes.md +++ b/master_changes.md @@ -62,6 +62,7 @@ users) ## Repository * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] + * Remove wget support for Software Heritage fallback [#6036 @rjbou] ## Lock @@ -157,6 +158,7 @@ users) * `OpamArg.InvalidCLI`: export exception [#6150 @rjbou] ## opam-repository + * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 633f43762b4..ab07a764b33 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -268,7 +268,8 @@ let get_output ~post ?(args=[]) url = let cmd_args = if post then match cmd_args with - | ("wget"|"curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("wget" as cmd)::args -> Some (cmd :: ["--method"; "POST"] @ args) | _ -> None else Some cmd_args in From 8a77f875b135cdd49e77b245d1786ae81a9a5efb Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 17:24:56 +0200 Subject: [PATCH 02/12] download: use long format option for curl POST request --- master_changes.md | 1 + src/repository/opamDownload.ml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index 2a02184901e..77caf0fb59a 100644 --- a/master_changes.md +++ b/master_changes.md @@ -159,6 +159,7 @@ users) ## opam-repository * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] + * `OpamDownload.get_output`: use long form for `curl` `POST` request option [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index ab07a764b33..fef1e3c63a7 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -268,7 +268,7 @@ let get_output ~post ?(args=[]) url = let cmd_args = if post then match cmd_args with - | ("curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("curl" as cmd)::args -> Some (cmd :: ["--request"; "POST"] @ args) | ("wget" as cmd)::args -> Some (cmd :: ["--method"; "POST"] @ args) | _ -> None else Some cmd_args From 8883aa31c7b8571f3e3d8346d23998be30c0cd01 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 27 Nov 2024 17:51:55 +0100 Subject: [PATCH 03/12] download: remove wget support for SWH fallback to focus on more fine-grained curl handling --- src/repository/opamDownload.ml | 20 ++++++-------------- tests/reftests/swhid.unix.test | 18 +----------------- 2 files changed, 7 insertions(+), 31 deletions(-) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index fef1e3c63a7..3745c119b21 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -253,12 +253,6 @@ let download ?quiet ?validate ~overwrite ?compress ?checksum url dstdir = (** Stdout output retrieval and post requests management *) -let post_tools = ["wget"; "curl"] -let check_post_tool () = - match Lazy.force OpamRepositoryConfig.(!r.download_tool) with - | [(CIdent cmd), _], _ -> List.mem cmd post_tools - | _ -> false - let get_output ~post ?(args=[]) url = let cmd_args = download_args ~url ~out:"-" ~retry:OpamRepositoryConfig.(!r.retries) @@ -319,9 +313,8 @@ module SWHID = struct | Some out -> Some (String.concat "" out) | None -> - OpamConsole.error "Software Heritage fallback needs %s or %s installed" - (OpamConsole.colorise `underline "curl") - (OpamConsole.colorise `underline "wget"); + OpamConsole.error "Software Heritage fallback needs %s installed" + (OpamConsole.colorise `underline "curl"); None let get_dir hash = @@ -365,7 +358,8 @@ module SWHID = struct match OpamFile.URL.swhid urlf with | None -> Done (Result None) | Some swhid -> - if check_post_tool () then + match Lazy.force OpamRepositoryConfig.(!r.download_tool) with + | _, `Curl -> check_liveness () @@+ fun alive -> if alive then (* Add a global modifier and/or command for default answering *) @@ -424,10 +418,8 @@ module SWHID = struct Done (Not_available (Some (fallback_err "unreachable"), fallback_err "network failure or API down")) - else + | _ -> Done (Not_available (Some (fallback_err "no retrieval"), - fallback_err "Download tool permitting post request (%s) not \ - set as download tool" - (OpamStd.Format.pretty_list post_tools))) + fallback_err "Curl is required for Software Heritage fallback")) end diff --git a/tests/reftests/swhid.unix.test b/tests/reftests/swhid.unix.test index 737711c3e98..1c6bf320710 100644 --- a/tests/reftests/swhid.unix.test +++ b/tests/reftests/swhid.unix.test @@ -100,22 +100,6 @@ The following actions will be performed: Done. ### opam clean -c Clearing cache of downloaded files -### opam option download-command=wget -Set to 'wget' the field download-command in global configuration -### opam install snappy-swhid-dir -v | grep -v '^Processing' -The following actions will be performed: -=== install 1 package - - install snappy-swhid-dir 2 - -<><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> -Source https://fake.exe/url.tar.gz is not available. Do you want to try to retrieve it from Software Heritage cache (https://www.softwareheritage.org)? It may take few minutes. [y/N] y --> retrieved snappy-swhid-dir.2 (SWH fallback) --> installed snappy-swhid-dir.2 -Done. -### opam clean -c -Clearing cache of downloaded files -### opam option download-command=curl -Set to 'curl' the field download-command in global configuration ### opam-version: "2.0" url { @@ -152,7 +136,7 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> [ERROR] Failed to get sources of snappy-swhid-ko.2: SWH fallback: no retrieval -OpamSolution.Fetch_fail("SWH fallback: Download tool permitting post request (wget and curl) not set as download tool") +OpamSolution.Fetch_fail("SWH fallback: Curl is required for Software Heritage fallback") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> From 302f0774edd67c3f49f85c4aa08d448a19b2d62f Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 18:42:14 +0200 Subject: [PATCH 04/12] download: fix SWH liveness check --- master_changes.md | 2 ++ src/repository/opamDownload.ml | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/master_changes.md b/master_changes.md index 77caf0fb59a..fed1adfc560 100644 --- a/master_changes.md +++ b/master_changes.md @@ -63,6 +63,8 @@ users) ## Repository * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] * Remove wget support for Software Heritage fallback [#6036 @rjbou] + * [BUG] Fix SWH archive cooking request for wget [#6036 @rjbou - fix #5721] + * [BUG] Fix SWH liveness check [#6036 @rjbou] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 3745c119b21..037470660ce 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -283,8 +283,13 @@ module SWHID = struct let check_liveness () = OpamProcess.Job.catch (fun _ -> Done false) @@ fun () -> - get_output ~post:true OpamUrl.Op.(instance / "api" / "1" / "ping" / "") - @@| fun _ -> true + get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") + @@| function + | Some (pong::_) -> + (* curl output after answer the http code *) + (* https://archive.softwareheritage.org/api/1/ping/ *) + OpamStd.String.starts_with ~prefix:"\"pong\"" pong + | Some _ | None -> false let get_value key s = match OpamJson.of_string s with From e9d0042bc2a9f7b797de72cb8c46606a2ed1f16c Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 19:39:10 +0200 Subject: [PATCH 05/12] download: fix SWH cooking request and status retrieval API request --- master_changes.md | 1 + src/repository/opamDownload.ml | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/master_changes.md b/master_changes.md index fed1adfc560..849444aecb8 100644 --- a/master_changes.md +++ b/master_changes.md @@ -65,6 +65,7 @@ users) * Remove wget support for Software Heritage fallback [#6036 @rjbou] * [BUG] Fix SWH archive cooking request for wget [#6036 @rjbou - fix #5721] * [BUG] Fix SWH liveness check [#6036 @rjbou] + * Update SWH API request [#6036 @rjbou] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 037470660ce..8cb4329f851 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -278,7 +278,10 @@ module SWHID = struct let instance = OpamUrl.of_string "https://archive.softwareheritage.org" (* we keep api 1 hardcoded for the moment *) - let full_url middle hash = OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let full_url middle hash = + OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let vault_url kind hash = + full_url ("vault/" ^ kind) ("swh:1:dir:" ^ hash) let check_liveness () = OpamProcess.Job.catch (fun _ -> Done false) @@ -323,7 +326,10 @@ module SWHID = struct None let get_dir hash = - let url = full_url "vault/directory" hash in + (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) + let url = vault_url "flat" hash in + (* The POST is needed only for asking to cook the archive, it's a no-op on + status check *) get_output ~post:true url @@| OpamStd.Option.replace @@ fun json -> let status = get_value "status" json in let fetch_url = get_value "fetch_url" json in From 13437c7f2f96fee627d8655cf2e7a19f6466eb50 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 15:24:38 +0100 Subject: [PATCH 06/12] download: add internal function to retrieve http code for curl. --- master_changes.md | 1 + src/repository/opamDownload.ml | 92 ++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 42 deletions(-) diff --git a/master_changes.md b/master_changes.md index 849444aecb8..4f0b3303a7c 100644 --- a/master_changes.md +++ b/master_changes.md @@ -163,6 +163,7 @@ users) ## opam-repository * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] * `OpamDownload.get_output`: use long form for `curl` `POST` request option [#6036 @rjbou] + * `OpamDownload.download`: more fine grained HTTP request error code detection for curl [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 8cb4329f851..0fd1d849621 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -129,57 +129,65 @@ let download_command_t ~with_curl_mitigation ~compress ?checksum ~url ~dst c = OpamSystem.make_command ~allow_stdin:false ~stdout cmd args @@> c let tool_return redownload_command url ret = + let open OpamProcess in match Lazy.force OpamRepositoryConfig.(!r.download_tool) with | _, `Default -> if OpamProcess.is_failure ret then - fail (Some "Download command failed", - Printf.sprintf "Download command failed: %s" - (OpamProcess.result_summary ret)) - else Done () + Done (`fail (Some "Download command failed", + Printf.sprintf "Download command failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok | _, `Curl -> - if OpamProcess.is_failure ret then - if ret.r_code = 43 then begin - (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should - never be encountered using the curl binary, so we assume that it's - a manifestation of curl/curl#13845 (see also #6120). *) - log "Attempting to mitigate curl/curl#13845"; - redownload_command ~with_curl_mitigation:true @@ function ret -> - if OpamProcess.is_failure ret then - if ret.r_code = 22 then - (* If this broken version of curl persists for some time, it is - relatively straightforward to parse the http response code from - the message, as it hasn't changed. *) - fail (Some "curl failed owing to a server-side issue", - Printf.sprintf "curl failed with server-side error: %s" - (OpamProcess.result_summary ret)) - else - fail (Some "curl failed", - Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else Done () - end else - fail (Some "curl failed", Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else - match ret.OpamProcess.r_stdout with - | [] -> - fail (Some "curl empty response", - Printf.sprintf "curl: empty response while downloading %s" - (OpamUrl.to_string url)) - | l -> - let code = List.hd (List.rev l) in - let num = try int_of_string code with Failure _ -> 999 in - if num >= 400 then - fail (Some ("curl error code " ^ code), - Printf.sprintf "curl: code %s while downloading %s" - code (OpamUrl.to_string url)) - else Done () + match ret with + | { r_code = 0 ; r_stdout = []; _ } -> + Done (`fail (Some "curl empty response", + Printf.sprintf "curl: empty response while downloading %s" + (OpamUrl.to_string url))) + | { r_code = 0 ; r_stdout = (_::_ as l); _ } -> + let code = List.hd (List.rev l) in + let num = try int_of_string code with Failure _ -> 999 in + if num >= 400 then + Done (`http_error num) + else Done `ok + | { r_code = 43; _ } -> + (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should + never be encountered using the curl binary, so we assume that it's + a manifestation of curl/curl#13845 (see also #6120). *) + log "Attempting to mitigate curl/curl#13845"; + (redownload_command ~with_curl_mitigation:true @@ function ret -> + if OpamProcess.is_failure ret then + if ret.r_code = 22 then + (* If this broken version of curl persists for some time, it is + relatively straightforward to parse the http response code from + the message, as it hasn't changed. *) + Done (`fail (Some "curl failed owing to a server-side issue", + Printf.sprintf "curl failed with server-side error: %s" + (OpamProcess.result_summary ret))) + else + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok) + | _ -> (* code <> 0 / 43 *) + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) -let download_command ~compress ?checksum ~url ~dst () = +let download_command_http_error ~compress ?checksum ~url ~dst () = let download_command = download_command_t ~compress ?checksum ~url ~dst in download_command ~with_curl_mitigation:false @@ tool_return download_command url +let download_command ~compress ?checksum ~url ~dst () = + download_command_http_error ~compress ?checksum ~url ~dst () + @@| function + | `ok -> () + | `http_error code -> + fail (Some ("HTTP error code " ^ string_of_int code), + Printf.sprintf "code %d while downloading %s" + code (OpamUrl.to_string url)) + | `fail (s,l) -> fail (s,l) + let really_download ?(quiet=false) ~overwrite ?(compress=false) ?checksum ?(validate=true) ~url ~dst () = From 979c25d9cdfa126656ce3077092553997b918f3e Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 19:43:07 +0100 Subject: [PATCH 07/12] download: rework SWH retrieval fallback * Update to new API * More fine-grained workflow: check is archive is already cooked: if not request cooking, otherwise retrieve url of the archive * Use only post request when needed --- master_changes.md | 1 + src/repository/opamDownload.ml | 193 ++++++++++++++++++++++++--------- 2 files changed, 145 insertions(+), 49 deletions(-) diff --git a/master_changes.md b/master_changes.md index 4f0b3303a7c..32d1680ba34 100644 --- a/master_changes.md +++ b/master_changes.md @@ -66,6 +66,7 @@ users) * [BUG] Fix SWH archive cooking request for wget [#6036 @rjbou - fix #5721] * [BUG] Fix SWH liveness check [#6036 @rjbou] * Update SWH API request [#6036 @rjbou] + * Rework SWH fallback to have a more correct archive retrieval and more fine grained error handling [#6036 @rjbou] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 0fd1d849621..f0352d8e650 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -263,6 +263,7 @@ let download ?quiet ?validate ~overwrite ?compress ?checksum url dstdir = let get_output ~post ?(args=[]) url = let cmd_args = + (* should we read from output or redirect in a file ? *) download_args ~url ~out:"-" ~retry:OpamRepositoryConfig.(!r.retries) ~compress:false () @ args @@ -291,16 +292,20 @@ module SWHID = struct let vault_url kind hash = full_url ("vault/" ^ kind) ("swh:1:dir:" ^ hash) - let check_liveness () = - OpamProcess.Job.catch (fun _ -> Done false) - @@ fun () -> - get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") - @@| function - | Some (pong::_) -> - (* curl output after answer the http code *) - (* https://archive.softwareheritage.org/api/1/ping/ *) - OpamStd.String.starts_with ~prefix:"\"pong\"" pong - | Some _ | None -> false + let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + + let get_output ?(post=false) url = + get_output ~post url @@| function + | Some out -> out + | None -> + (* Shouldn't happen, we already checked that a post tool is used *) + (* XXX change to an assert false ? *) + OpamConsole.error "Software Heritage fallback needs %s or %s installed" + (OpamConsole.colorise `underline "curl") + (OpamConsole.colorise `underline "wget"); + fail (None, + "Software Heritage fallback not available as \ + it needs curl or wget used") let get_value key s = match OpamJson.of_string s with @@ -310,6 +315,89 @@ module SWHID = struct | _ -> None) | _ -> None + let check_liveness () = + OpamProcess.Job.catch (fun _ -> Done false) + @@ fun () -> + get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") + @@| function + | pong::_ -> + (* curl output after answering the http code *) + (* https://archive.softwareheritage.org/api/1/ping/ *) + OpamStd.String.starts_with ~prefix:"\"pong\"" pong + | _ -> false + + (* + Returned error JSONs + { + "error":"Resource not found", + "reason":"The resource /api/1/vault/flat/swh:1:dir:6b700f4b287aee509adbc723d030309188684f4/ could not be found on the server." + } + { + "exception":"NotFoundExc", + "reason":"Cooking of swh:1:dir:6b700f4b287aee509adbc723d030309188684f04 was never requested." + } + { + "exception":"NotFoundExc", + "reason":"swh:1:dir:0000000000000000000000000000000000000000 not found." + } + *) + let parse_err json = + match get_value "exception" json with + | Some "NotFoundExc" -> + (match get_value "reason" json with + | Some reason -> + if OpamStd.String.ends_with ~suffix:"was never requested." reason then + `Uncooked + else if OpamStd.String.ends_with ~suffix:"not found." reason then + `Not_found + else `Error + | None -> `Error) + | Some "Resource not found" -> `Not_found + | Some _ | None -> `Error + + let is_it_cooked url = + let dst = OpamSystem.temp_file ~auto_clean:false "swh-out" in + let download_cmd ~with_curl_mitigation return = + let cmd, args = + match + download_args ~url ~out:dst + ~with_curl_mitigation + ~retry:OpamRepositoryConfig.(!r.retries) + ~compress:false () + with + | "curl" as cmd::args -> cmd, args + | "wget" as cmd::args -> cmd, "--content-on-error"::args + | _ -> assert false + in + let stdout = OpamSystem.temp_file ~auto_clean:false "dl" in + OpamProcess.Job.finally (fun () -> OpamSystem.remove_file stdout) + @@ fun () -> + OpamSystem.make_command ~allow_stdin:false ~stdout cmd args + @@> return + in + (download_cmd ~with_curl_mitigation:false + @@ tool_return download_cmd url) + @@| fun status -> + let read_last_line file = + try + OpamStd.String.split (OpamSystem.read file) '\n' + |> List.rev + |> List.hd + with Failure _ -> "" + in + let status = + match status with + | `ok -> + let json = read_last_line dst in + if String.equal json "" then `Error else `Cooked json + | `http_error 404 -> + let json = read_last_line dst in + parse_err json + | `http_error _ | `fail _ -> `Error + in + OpamSystem.remove_file dst; (* TODO XXX and in case of error raised ? *) + status + (* SWH request output example directory: retrieve "status" & "fetch_url" $ curl https://archive.softwareheritage.org/api/1/vault/directory/4453cfbdab1a996658cd1a815711664ee7742380/ @@ -324,53 +412,60 @@ module SWHID = struct } *) - let get_output ?(post=false) url = - get_output ~post url @@| function - | Some out -> - Some (String.concat "" out) - | None -> - OpamConsole.error "Software Heritage fallback needs %s installed" - (OpamConsole.colorise `underline "curl"); - None - - let get_dir hash = - (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) - let url = vault_url "flat" hash in - (* The POST is needed only for asking to cook the archive, it's a no-op on - status check *) - get_output ~post:true url @@| OpamStd.Option.replace @@ fun json -> + let read_flat_out json = let status = get_value "status" json in let fetch_url = get_value "fetch_url" json in match status, fetch_url with - | None, _ | _, None -> None + | None, _ | _, None -> + (match parse_err json with + | `Not_found -> `Not_found + | `Error | `Uncooked -> `Malformed) | Some status, Some fetch_url -> - Some (match status with - | "done" -> `Done (OpamUrl.of_string fetch_url) - | "pending" -> `Pending - | "new" -> `New - | "failed" -> `Failed - | _ -> `Unknown) - - let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + match status with + | "done" -> `Done (OpamUrl.of_string fetch_url) + | "pending" -> `Pending + | "new" -> `New + | "failed" -> `Failed + | _ -> `Unknown let get_url ?(max_tries=6) swhid = - let attempts = max_tries in + let request_cooking ?(post=false) url = + get_output ~post url @@| fun out -> String.concat "" out + in let hash = OpamSWHID.hash swhid in - let rec aux max_tries = - if max_tries <= 0 then - Done (Not_available - (Some (fallback_err "max_tries"), - fallback_err "%d attempts tried; aborting" attempts)) - else - get_dir hash @@+ function - | Some (`Done fetch_url) -> Done (Result fetch_url) - | Some (`Pending | `New) -> - Unix.sleep 10; - aux (max_tries - 1) - | None | Some (`Failed | `Unknown) -> - Done (Not_available (None, fallback_err "Unknown swhid")) + (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) + let url = vault_url "flat" hash in + let rec loop attempt json = + match read_flat_out json with + | `Done fetch_url -> Done (Result fetch_url) + | `Pending | `New -> + log "%s is cooking (%d/%d)..." + (OpamSWHID.to_string swhid) attempt max_tries; + if (attempt : int) >= (max_tries : int) then + Done (Not_available + (Some (fallback_err "attempt"), + fallback_err "%d attempts tried; aborting" max_tries)) + else + (Unix.sleep 10; + request_cooking ~post:false url + @@+ loop (attempt + 1)) + | `Malformed -> + Done (Not_available (None, fallback_err "Malformed request answer")) + | `Failed | `Unknown | `Not_found -> + Done (Not_available (None, fallback_err "Unknown swhid")) in - aux max_tries + let retrieve_url json = loop 1 json in + is_it_cooked url + @@+ function + | `Error -> Done (Not_available (None, fallback_err "Request error")) + | `Not_found -> Done (Not_available (None, fallback_err "Unknown swhid")) + | `Cooked json -> + log "%s is cooked or cooking, requesting url" (OpamSWHID.to_string swhid); + retrieve_url json + | `Uncooked -> + log "%s is uncooked, request cooking" (OpamSWHID.to_string swhid); + request_cooking ~post:true url + @@+ retrieve_url (* for the moment only used in sources, not extra sources or files *) let archive_fallback ?max_tries urlf dirnames = From 349dba9830e2b2460874b5cc3668e75f9761bade Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 19:44:03 +0100 Subject: [PATCH 08/12] swhid: add some debug logging --- src/repository/opamDownload.ml | 110 +++++++++++++++++---------------- 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index f0352d8e650..6d54520c4b5 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -476,58 +476,64 @@ module SWHID = struct | _, `Curl -> check_liveness () @@+ fun alive -> if alive then - (* Add a global modifier and/or command for default answering *) - if OpamConsole.confirm ~default:false - "Source %s is not available. Do you want to try to retrieve it \ - from Software Heritage cache (https://www.softwareheritage.org)? \ - It may take few minutes." - (OpamConsole.colorise `underline - (OpamUrl.to_string (OpamFile.URL.url urlf))) then - (log "SWH fallback for %s" - (OpamUrl.to_string (OpamFile.URL.url urlf)); - get_url ?max_tries swhid @@+ function - | Not_available _ as error -> Done error - | Up_to_date _ -> assert false - | Result url -> - let hash = OpamSWHID.hash swhid in - OpamFilename.with_tmp_dir_job @@ fun dir -> - let archive = OpamFilename.Op.(dir // hash) in - download_as ~overwrite:true url archive @@+ fun () -> - let sources = OpamFilename.Op.(dir / "src") in - OpamFilename.extract_job archive sources @@| function - | Some e -> - Not_available ( - Some (fallback_err "archive extraction failure"), - fallback_err "archive extraction failure %s" - (match e with - | Failure s -> s - | OpamSystem.Process_error pe -> - OpamProcess.string_of_result pe - | e -> Printexc.to_string e)) - | None -> - (match OpamSWHID.compute sources with - | None -> - Not_available ( - Some (fallback_err "can't check archive validity"), - fallback_err - "error on swhid computation, can't check its validity") - | Some computed -> - if String.equal computed hash then - (List.iter (fun (_nv, dst, _sp) -> - (* add a text *) - OpamFilename.copy_dir ~src:sources ~dst) - dirnames; - Result (Some "SWH fallback")) - else - Not_available ( - Some (fallback_err "archive not valid"), - fallback_err - "archive corrupted, opam file swhid %S vs computed %S" - hash computed))) - else - Done (Not_available - (Some (fallback_err "skip retrieval"), - fallback_err "retrieval refused by user")) + (log "API is working"; + (* Add a global modifier and/or command for default answering *) + if OpamConsole.confirm ~default:false + "Source %s is not available. Do you want to try to retrieve it \ + from Software Heritage cache (https://www.softwareheritage.org)? \ + It may take few minutes." + (OpamConsole.colorise `underline + (OpamUrl.to_string (OpamFile.URL.url urlf))) then + (log "SWH fallback for %s with %s" + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)) + (OpamSWHID.to_string swhid); + get_url ?max_tries swhid @@+ function + | Not_available _ as error -> Done error + | Up_to_date _ -> assert false + | Result url -> + log "Downloading %s for %s" (OpamSWHID.to_string swhid) + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)); + let hash = OpamSWHID.hash swhid in + OpamFilename.with_tmp_dir_job @@ fun dir -> + let archive = OpamFilename.Op.(dir // hash) in + download_as ~overwrite:true url archive @@+ fun () -> + let sources = OpamFilename.Op.(dir / "src") in + OpamFilename.extract_job archive sources @@| function + | Some e -> + Not_available ( + Some (fallback_err "archive extraction failure"), + fallback_err "archive extraction failure %s" + (match e with + | Failure s -> s + | OpamSystem.Process_error pe -> + OpamProcess.string_of_result pe + | e -> Printexc.to_string e)) + | None -> + (match OpamSWHID.compute sources with + | None -> + Not_available ( + Some (fallback_err "can't check archive validity"), + fallback_err + "error on swhid computation, can't check its validity") + | Some computed -> + if String.equal computed hash then + (List.iter (fun (_nv, dst, _sp) -> + (* add a text *) + OpamFilename.copy_dir ~src:sources ~dst) + dirnames; + Result (Some "SWH fallback")) + else + Not_available ( + Some (fallback_err "archive not valid"), + fallback_err + "archive corrupted, opam file swhid %S vs computed %S" + hash computed))) + else + Done (Not_available + (Some (fallback_err "skip retrieval"), + fallback_err "retrieval refused by user"))) else Done (Not_available (Some (fallback_err "unreachable"), From 24777bfc82de7c40f9ff8cb8aeb0cfc74644d793 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 27 Nov 2024 18:18:15 +0100 Subject: [PATCH 09/12] Put back wget support for swhid This reverts commit 649fcd3e52affeb7e416e2b496c753e4f8b958b5. --- src/repository/opamDownload.ml | 15 +++++++++++---- tests/reftests/swhid.unix.test | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 6d54520c4b5..21d65c417a8 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -261,6 +261,12 @@ let download ?quiet ?validate ~overwrite ?compress ?checksum url dstdir = (** Stdout output retrieval and post requests management *) +let post_tools = ["wget"; "curl"] +let check_post_tool () = + match Lazy.force OpamRepositoryConfig.(!r.download_tool) with + | [(CIdent cmd), _], _ -> List.mem cmd post_tools + | _ -> false + let get_output ~post ?(args=[]) url = let cmd_args = (* should we read from output or redirect in a file ? *) @@ -472,8 +478,7 @@ module SWHID = struct match OpamFile.URL.swhid urlf with | None -> Done (Result None) | Some swhid -> - match Lazy.force OpamRepositoryConfig.(!r.download_tool) with - | _, `Curl -> + if check_post_tool () then check_liveness () @@+ fun alive -> if alive then (log "API is working"; @@ -538,8 +543,10 @@ module SWHID = struct Done (Not_available (Some (fallback_err "unreachable"), fallback_err "network failure or API down")) - | _ -> + else Done (Not_available (Some (fallback_err "no retrieval"), - fallback_err "Curl is required for Software Heritage fallback")) + fallback_err "Download tool permitting post request (%s) not \ + set as download tool" + (OpamStd.Format.pretty_list post_tools))) end diff --git a/tests/reftests/swhid.unix.test b/tests/reftests/swhid.unix.test index 1c6bf320710..737711c3e98 100644 --- a/tests/reftests/swhid.unix.test +++ b/tests/reftests/swhid.unix.test @@ -100,6 +100,22 @@ The following actions will be performed: Done. ### opam clean -c Clearing cache of downloaded files +### opam option download-command=wget +Set to 'wget' the field download-command in global configuration +### opam install snappy-swhid-dir -v | grep -v '^Processing' +The following actions will be performed: +=== install 1 package + - install snappy-swhid-dir 2 + +<><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> +Source https://fake.exe/url.tar.gz is not available. Do you want to try to retrieve it from Software Heritage cache (https://www.softwareheritage.org)? It may take few minutes. [y/N] y +-> retrieved snappy-swhid-dir.2 (SWH fallback) +-> installed snappy-swhid-dir.2 +Done. +### opam clean -c +Clearing cache of downloaded files +### opam option download-command=curl +Set to 'curl' the field download-command in global configuration ### opam-version: "2.0" url { @@ -136,7 +152,7 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> [ERROR] Failed to get sources of snappy-swhid-ko.2: SWH fallback: no retrieval -OpamSolution.Fetch_fail("SWH fallback: Curl is required for Software Heritage fallback") +OpamSolution.Fetch_fail("SWH fallback: Download tool permitting post request (wget and curl) not set as download tool") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> From b52404465017cbb136ddf0319c00bc29cb9c9770 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Fri, 15 Nov 2024 18:04:55 +0100 Subject: [PATCH 10/12] repository: add re as a dependency of opam-repository --- master_changes.md | 1 + opam-repository.opam | 1 + src/repository/dune | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index 32d1680ba34..b9850f1008d 100644 --- a/master_changes.md +++ b/master_changes.md @@ -86,6 +86,7 @@ users) ## VCS ## Build + * Add `re` as a dependency of `opam-repository` [#6036 @rjbou] ## Infrastructure diff --git a/opam-repository.opam b/opam-repository.opam index 96505f6887c..442c22d41e1 100644 --- a/opam-repository.opam +++ b/opam-repository.opam @@ -31,4 +31,5 @@ depends: [ "ocaml" {>= "4.08.0"} "opam-format" {= version} "dune" {>= "2.8.0"} + "re" {>= "1.10.0"} ] diff --git a/src/repository/dune b/src/repository/dune index cb622b218b4..5a94881525c 100644 --- a/src/repository/dune +++ b/src/repository/dune @@ -3,7 +3,7 @@ (public_name opam-repository) (synopsis "OCaml Package Manager remote repository handling library") ; TODO: Remove (re_export ...) when CI uses the OCaml version that includes https://github.com/ocaml/ocaml/pull/11989 - (libraries (re_export opam-format)) + (libraries (re_export opam-format) re) (flags (:standard (:include ../ocaml-flags-standard.sexp) (:include ../ocaml-flags-configure.sexp) From d166936c1cc9f534e9bc94980936e20af88b4b25 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 15:24:38 +0100 Subject: [PATCH 11/12] download: parse http error code for wget too and permit to retrieve it. --- src/repository/opamDownload.ml | 58 ++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 21d65c417a8..7f948d094b5 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -129,8 +129,51 @@ let download_command_t ~with_curl_mitigation ~compress ?checksum ~url ~dst c = OpamSystem.make_command ~allow_stdin:false ~stdout cmd args @@> c let tool_return redownload_command url ret = + let code_from_header std = + let re = + Re.(compile @@ seq [ + bos; rep space; + str "HTTP/"; rep1 @@ diff any space; space; + group @@ repn digit 3 (Some 3) + ]) in + List.filter_map (fun l -> + try + let r = + Re.(Group.get (exec re l) 1) + in + let code = int_of_string r in + if code < 400 then None else Some code + with Failure _ -> None + | Not_found -> None + ) std + in let open OpamProcess in match Lazy.force OpamRepositoryConfig.(!r.download_tool) with + | [(CIdent "wget"), _], `Default -> + (match ret.OpamProcess.r_code with + | 0 -> (* Http respose < 400 *) + Done `ok + | 8 -> (* Http response > 400 *) + let http_answers = + code_from_header ret.OpamProcess.r_stderr + in + (match http_answers with + | code::_ -> Done (`http_error code) + | [] -> + Done (`fail (Some "wget empty response", + Printf.sprintf "curl: empty response while downloading %s" + (OpamUrl.to_string url)))) + | _ -> (* Another error *) + (* 1 Generic error code. + 2 Parse error---for instance, when parsing command-line options + 3 File I/O error. + 4 Network failure. + 5 SSL verification failure. + 6 Username/password authentication failure. + 7 Protocol errors. *) + Done (`fail (Some "wget command error", + Printf.sprintf "wget error: %s" + (OpamProcess.result_summary ret)))) | _, `Default -> if OpamProcess.is_failure ret then Done (`fail (Some "Download command failed", @@ -157,12 +200,15 @@ let tool_return redownload_command url ret = (redownload_command ~with_curl_mitigation:true @@ function ret -> if OpamProcess.is_failure ret then if ret.r_code = 22 then - (* If this broken version of curl persists for some time, it is - relatively straightforward to parse the http response code from - the message, as it hasn't changed. *) - Done (`fail (Some "curl failed owing to a server-side issue", - Printf.sprintf "curl failed with server-side error: %s" - (OpamProcess.result_summary ret))) + match code_from_header ret.r_stdout with + | code::_ -> Done (`http_error code) + | [] -> + (* If this broken version of curl persists for some time, it is + relatively straightforward to parse the http response code from + the message, as it hasn't changed. *) + Done (`fail (Some "curl failed owing to a server-side issue", + Printf.sprintf "curl failed with server-side error: %s" + (OpamProcess.result_summary ret))) else Done (`fail (Some "curl failed", Printf.sprintf "curl failed: %s" From 8392039a166fbbb6dde46f0422570a2b81d3f9ee Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 27 Nov 2024 18:33:48 +0100 Subject: [PATCH 12/12] fixup! download: parse http error code for wget too and permit to retrieve it. --- src/repository/opamDownload.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 7f948d094b5..890bc5971b3 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -37,6 +37,7 @@ let curl_args = (* --fail is as old as curl; though the assumption that it leads to exit code 22 when there's an error is probably 5.3 21-Dec-1998 (prior to that it led to exit code 21) *) + (CString "--show-headers", None) :: (CString "--fail", None) :: main_args else (CString "--write-out", None) :: @@ -53,6 +54,7 @@ let wget_args = [ CString "--header=Accept: */*", None; CString "-t", None; CIdent "retry", None; CString "-O", None; CIdent "out", None; + CString "--server-response", None; (* Get the HTTP responde to parse error code *) CString "-U", None; user_agent, None; CString "--", None; (* End list of options *) CIdent "url", None;