summaryrefslogtreecommitdiff
path: root/getbib
blob: 2efe91d30d7b071ce2929941dc8da4188dd25860 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# usage: getbib file.pdf
# result: prints the bibtex entry (entries) for the DOI(s) in the document
# TODO: automatically add doi to the .bib file

if [ -z "$1" ]; then
  echo "Provide a pdf."
  exit 1
fi

PDF="$1"

# 1. Convert PDF to text
TEXT=$(pdftotext "$PDF" -)

# 2. Convert text to a single line
ONELINE=$(printf "%s" "$TEXT" | tr '\n' ' ')

# 3. Exctract DOIs
DOIS=$(printf "%s" "$ONELINE" | \
       grep -aoE '10\.[0-9]{4,9}/[A-Za-z0-9._;()/:<>-]+' | \
       sort -u)

if [ -z "$DOIS" ]; then
  echo "No DOI found."
  exit 1
fi

# 4. Get Crossref-BibTeX for every DOI
for DOI in $DOIS; do
    echo "Found DOI: $DOI"
    
    ENCODED_DOI=$(printf "%s" "$DOI" | sed 's/\//%2F/g')
    URL="https://api.crossref.org/works/$ENCODED_DOI/transform"

    echo
    echo "BibTeX:"
    curl -L -s -H "Accept: text/bibliography; style=bibtex" "$URL"
done