Skip to content

Commit 678e441

Browse files
committed
Add fetch_book.sh helper script
1 parent 72dc4b5 commit 678e441

File tree

1 file changed

+187
-0
lines changed

1 file changed

+187
-0
lines changed

fetch_book.sh

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
origin=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) || exit
6+
7+
escape_url() {
8+
printf %s "$1" | jq -sRr @uri
9+
}
10+
11+
trim() {
12+
sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' <<< "$1"
13+
}
14+
15+
parseNumber() {
16+
echo "$1" | tr ',' '.' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
17+
}
18+
19+
if [ $# -eq 1 ] && [ -f "$1" ]; then
20+
filename=$(basename "$1")
21+
if [ "${filename##*.}" = "docx" ]; then
22+
echo "Provided file is a .docx, converting to text"
23+
filename_b=$(basename "$1" .docx)
24+
echo "filename_b=$filename_b"
25+
# unzip -p "$1" word/document.xml | sed -e 's/<\/w:p>/ /g; s/<[^>]\{1,\}>/ /g; s/[^[:print:]]\{1,\}/ /g'
26+
docx2txt.sh "$1"
27+
file="${PWD}/${filename_b}.txt"
28+
cd "$(dirname "$1")"
29+
mv "${filename_b}.txt" "${file}"
30+
echo "--------"
31+
echo "STOP will most probably need post processing of resulting ${file} to follow requirements"
32+
echo ""
33+
echo "title | author | rating | description"
34+
echo ""
35+
echo " Where title & author are mandatory, rating falls back to 0 (unrated) and description is optional"
36+
echo " Rating can be X/Y or X, rating value is always converted to 10 upper bound (3 is then 3/10, 3.5/5 is then 7/10)"
37+
echo "--------"
38+
exit 1
39+
else
40+
file="$1"
41+
fi
42+
43+
echo "Reading book information from file ${file}"
44+
pivot=0
45+
page_size=20
46+
limit=$((pivot+page_size))
47+
line_nb=0
48+
# FIXME ensure last line is properly read even if no newline at the end
49+
while IFS= read -r line; do
50+
[ -z "${line}" ] && continue
51+
52+
echo ""
53+
echo "${line_nb} ---------------"
54+
line_nb=$((line_nb+1))
55+
56+
# there are Google API rate limits, doing by batch can help iterating until limit is ignored
57+
[ ${line_nb} -lt "${pivot}" ] && continue
58+
[ ${line_nb} -gt "${limit}" ] && break
59+
60+
title=$(trim "$(cut -d'|' -f1 <<< "${line}")")
61+
author=$(trim "$(cut -d'|' -f2 <<< "${line}")")
62+
rating_str=$(echo "${line}" | cut -d'|' -f3)
63+
if [ -n "${rating_str}" ]; then
64+
rating_value=$(parseNumber "$(cut -d/ -f1 <<< "${rating_str}")")
65+
echo "rating_value=$rating_value"
66+
rating_upper_bound=$(parseNumber "$(cut -d/ -f2 <<< "${rating_str}")")
67+
# we want rating to be in [0-10]
68+
rating=$(bc <<< "${rating_value} * 10 / ${rating_upper_bound}")
69+
else
70+
rating=0
71+
fi
72+
description=$(trim "$(cut -d'|' -f4 <<< "${line}")")
73+
74+
# call ourselves with values instead of file
75+
"${BASH_SOURCE[0]}" "${title}" "${author}" "${rating}" "${description}"
76+
done < "${file}"
77+
elif [ $# -gt 4 ]; then
78+
echo "Usage:"
79+
echo " either $0 \"title\" \"author\" [rating] [description]"
80+
echo " - rating is an integer in [0-10] (0 means unrated)"
81+
echo " or $0 file.txt"
82+
echo " where file.txt contains lines with the following format:"
83+
echo " title|author|rating|description"
84+
echo " rating is an integer in [0-10] (0 means unrated)"
85+
echo " rating can also be value/upper (e.g. 3/5) to be converted to a 10-based rating"
86+
exit 1
87+
else
88+
if [ -z "${3:-""}" ] || [ "${3:-"0"}" = "0" ]; then
89+
rating="0 # TBD"
90+
else
91+
rating=${3}
92+
fi
93+
94+
if [ -z "${4:-""}" ]; then
95+
description=""
96+
else
97+
description="${4}"
98+
fi
99+
100+
mkdir -p "${origin}/content/book"
101+
mkdir -p "${origin}/content/cover"
102+
103+
book_title_query=$(trim "$(tr "[:upper:]" "[:lower:]" <<< "${1}")")
104+
book_author_query=$(trim "$(tr "[:upper:]" "[:lower:]" <<< "${2}")")
105+
106+
book_query="'${book_title_query}' by '${book_author_query}'"
107+
108+
echo "Query book information for ${book_query}"
109+
110+
book_data=$(curl -s --get \
111+
--data "key=${GOOGLE_BOOKS_API_KEY}" \
112+
--data "langRestrict=fr" \
113+
--data-urlencode "q=${book_title_query} by ${book_author_query}" \
114+
'https://www.googleapis.com/books/v1/volumes' \
115+
-H 'Accept: application/json')
116+
117+
isbn=""
118+
if [ "$(jq -r .totalItems <<< "${book_data}")" -gt 0 ]; then
119+
selected_volume=$(jq -r 'first(.items[] | select(.volumeInfo.industryIdentifiers[] | select(.type == "ISBN_13")))' <<< "${book_data}")
120+
title="$(jq -r .volumeInfo.title <<< "${selected_volume}")"
121+
author="$(jq -r .volumeInfo.authors[0] <<< "${selected_volume}")"
122+
echo "Retrieved some book information for ${book_query}: '${title}' by '${author}'"
123+
isbn="$(jq -r '.volumeInfo.industryIdentifiers[] | select(.type == "ISBN_13") | .identifier' <<< "${selected_volume}")"
124+
if [ "${isbn}" = "null" ] || [ -z "${isbn}" ]; then
125+
isbn=""
126+
else
127+
echo "Found ISBN 13 '${isbn}'"
128+
fi
129+
130+
if [ -z "${description}" ]; then
131+
description="$(jq -r '.volumeInfo.description' <<< "${selected_volume}")"
132+
fi
133+
cover_full_url=$(jq -r .volumeInfo.imageLinks.thumbnail <<< "${selected_volume}")
134+
if [ "${cover_full_url}" = "null" ] || [ -z "${cover_full_url}" ]; then
135+
echo " ⚠︎ Can't find book cover for ${book_query} (ISBN: ${isbn})"
136+
else
137+
cover_short_url=$(cut -d'&' -f1 <<< "${cover_full_url}")
138+
cover_clean_url="${cover_short_url}&printsec=frontcover&img=1&source=gbs_api"
139+
# FIXME what about non JPEG files?
140+
cover_file="${origin}/content/cover/${isbn}.jpg"
141+
if [ -f "${cover_file}" ]; then
142+
echo "Cover already downloaded for ${book_query} (ISBN: ${isbn})"
143+
elif [ -n "${isbn}" ]; then
144+
echo "Downloading cover for ${book_query} (${isbn})"
145+
echo "Cover URL: ${cover_clean_url}"
146+
curl -s "${cover_clean_url}" > "${cover_file}"
147+
else
148+
echo "Won't try to download cover for ${book_query}, no ISBN to store result"
149+
fi
150+
echo "Cover available: ${cover_file}"
151+
fi
152+
153+
cover_query=$(escape_url "${book_title_query} ${book_author_query}")
154+
echo "Not satisfied with the cover? You can try to find the cover on:"
155+
echo " - Google image: https://www.google.fr/search?q=${cover_query}&udm=2"
156+
echo " - Amazon: https://www.amazon.fr/s?k=${cover_query}&i=stripbooks"
157+
fi
158+
159+
if [ -z "${isbn}" ]; then
160+
echo "Can't find any book or ISBN for ${book_query}"
161+
title="${book_title_query}"
162+
author="${book_author_query}"
163+
isbn="?????????????"
164+
fi
165+
166+
# book file (markdown)
167+
book_file="${origin}/content/book/${isbn}.md"
168+
if [ ! -f "${book_file}" ]; then
169+
uuid=$(uuidgen | tr '[:upper:]' '[:lower:]')
170+
echo "Creating a new book ${book_file} for '${title}' by '${author}'"
171+
echo "---
172+
uuid: ${uuid}
173+
REF: \"${book_query}\"
174+
title: \"${title}\"
175+
author: \"${author}\"
176+
rating: ${rating}
177+
read_date: $(date "+%Y-%m-%d") # TBD
178+
isbn: \"${isbn}\"
179+
---
180+
181+
${description}
182+
" > "${book_file}"
183+
else
184+
echo "Book ${book_file} already exists"
185+
head -n 8 "${book_file}"
186+
fi
187+
fi

0 commit comments

Comments
 (0)