#!/usr/bin/Rscript --vanilla --verbose # script to convert RePEc-style rdf files (ReDIFF) to DOAJ-type xml files # usage: oai.R [file] # where [file] is a list of rdf files # Jon Baron jonathanbaron7@gmail.com # name of file with list of rdf files RdfList <- scan(file=commandArgs(trailingOnly=TRUE)[1],what="character",sep="\n") # funtion to extract tags from .rdf Get <- function(x,y) {unlist(strsplit(r1[x],split=":[ ]*"))[y]} # there must be an easier way to convert month names to numbers than this Mon <- function(x) {y <- as.character(match(x,c("January","February","March","April", "May","June","July","August","September", "October","November","December"))) if(as.numeric(y)<10) y <- paste("0",y,sep="") return(y)} Item <- "" RecordTop <- " eng\ Society for Judgment and Decision Making\ Judgment and Decision Making\ 1930-2975\n" # construct an item j <- 0 Makeitem <- function(r1) { Author <- Authors <- Pdf <- Abstract <- Keywords <- Keys <- "" for (i in 1:length(r1)) { x <- Get(i,1) if (x=="Author-Name") {Authors <- c(Authors,Get(i,2))} if (x=="Title") {Title <- substring(r1[i],8)} if (x=="Pages") {PP <- unlist(strsplit(Get(i,2),split="-")) Start <- PP[1]; End <- PP[2]} if (x=="Volume") {Volume <- Get(i,2)} if (x=="Issue") {Issue <- Get(i,2)} if (x=="Year") {Year <- Get(i,2)} if (x=="Month") {Month <- Mon(Get(i,2))} if (x=="File-URL" & Pdf=="") {Pdf <- paste("http:",Get(i,3),sep="")} if (x=="Abstract") {j <- i+1; break}} for (i in j:length(r1)) { if (Get(i,1)=="Keywords") {j <- i+1; Keys <- paste(Keys,Get(i,2),sep=""); break} else {Abstract <- paste(Abstract,r1[i])}} Abstract <- gsub("\\\\&","and",Abstract) for (i in j:length(r1)) {Keys <- paste(Keys,Get(i,1),sep="")} Keys <- unlist(strsplit(Keys,split=",")) Item <- paste(RecordTop, "",Year,"-",Month,"-26\n", "",Volume,"", "",Issue,"\n", "",Start,"", "",End,"\n", "article\n", "",Title,"\n", "",sep="") for (i in 2:length(Authors)) { Author <- paste(Author,"",Authors[i],"",sep="") } Item <- paste(Item,Author,"",Abstract, " ",Pdf,"",sep="") for (i in 1:length(Keys)) { Keywords <- paste(Keywords,"",Keys[i],"",sep="") } Item <- paste(Item,Keywords,"",sep="\n") } for (k in RdfList) {r1 <- scan(file=k,what="character",sep="\n") Item <- paste(Item,Makeitem(r1),sep="\n")} write(strwrap(paste(Item,""),width=80),file="rss/oai.xml")