#!/bin/bash # usage: getbib file.pdf # result: prints the bibtex entry (entries) for the DOI(s) in the document # TODO: automatically add doi to the .bib file if [ -z "$1" ]; then echo "Provide a pdf." exit 1 fi PDF="$1" # 1. Convert PDF to text TEXT=$(pdftotext "$PDF" -) # 2. Convert text to a single line ONELINE=$(printf "%s" "$TEXT" | tr '\n' ' ') # 3. Exctract DOIs DOIS=$(printf "%s" "$ONELINE" | \ grep -aoE '10\.[0-9]{4,9}/[A-Za-z0-9._;()/:<>-]+' | \ sort -u) if [ -z "$DOIS" ]; then echo "No DOI found." exit 1 fi # 4. Get Crossref-BibTeX for every DOI for DOI in $DOIS; do echo "Found DOI: $DOI" ENCODED_DOI=$(printf "%s" "$DOI" | sed 's/\//%2F/g') URL="https://api.crossref.org/works/$ENCODED_DOI/transform" echo echo "BibTeX:" curl -L -s -H "Accept: text/bibliography; style=bibtex" "$URL" done