Spaces:
Build error
Build error
| # Specify the number of articles to download | |
| limit=10 | |
| # Fetch the list of articles with metadata in XML format | |
| response=$(curl -s "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf&limit=$limit") | |
| # Parse each record in the response | |
| echo "$response" | while read -r line; do | |
| # Extract the PMC ID | |
| if [[ $line =~ id=\"(PMC[0-9]+)\" ]]; then | |
| pmc_id="${BASH_REMATCH[1]}" | |
| echo "Processing article ID: $pmc_id" | |
| # Extract the title for metadata | |
| title=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*citation="\(.*\)".*/\1/p') | |
| # Extract the PDF link for download | |
| pdf_link=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*<link format="pdf"[^>]* href="\([^"]*\)".*/\1/p') | |
| # Check if we found a PDF link | |
| if [[ -n $pdf_link ]]; then | |
| # Print metadata | |
| echo "Title: $title" | |
| echo "Downloading PDF from: $pdf_link" | |
| # Download the PDF | |
| curl -O "$pdf_link" | |
| # Optional: Save metadata to a file | |
| echo "Title: $title" >> metadata.txt | |
| echo "PDF Link: $pdf_link" >> metadata.txt | |
| echo "---------------------" >> metadata.txt | |
| else | |
| echo "No PDF link found for article ID: $pmc_id" | |
| fi | |
| fi | |
| done | |