args = commandArgs(trailingOnly=TRUE)
#args=c('GSM210896.dat','Seed_match_count_miR-7.dat')

expr=read.table(args[1],header=T)
match=read.table(args[2],header=T)

probes_1_8mer=match$Probe[match$X6.mers==0 & match$X7.mers_t1A==0 & match$X7.mers_m8==0 & match$X8.mers==1]
probes_1_7mer_m8=match$Probe[match$X6.mers==0 & match$X7.mers_t1A==0 & match$X7.mers_m8==1 & match$X8.mers==0]
probes_1_7mer_t1A=match$Probe[match$X6.mers==0 & match$X7.mers_t1A==1 & match$X7.mers_m8==0 & match$X8.mers==0]
probes_1_6mer=match$Probe[match$X6.mers==1 & match$X7.mers_t1A==0 & match$X7.mers_m8==0 & match$X8.mers==0]
probes_no_site=match$Probe[match$X6.mers==0 & match$X7.mers_t1A==0 & match$X7.mers_m8==0 & match$X8.mers==0]

expr_1_8mer=subset(expr,is.element(expr$X.ID_REF..,probes_1_8mer))
expr_1_7mer_m8=subset(expr,is.element(expr$X.ID_REF..,probes_1_7mer_m8))
expr_1_7mer_t1A=subset(expr,is.element(expr$X.ID_REF..,probes_1_7mer_t1A))
expr_1_6mer=subset(expr,is.element(expr$X.ID_REF..,probes_1_6mer))
expr_no_site=subset(expr,is.element(expr$X.ID_REF..,probes_no_site))

F_no_site=ecdf(expr_no_site$VALUE..*log(10)/log(2))
F_1_6mer=ecdf(expr_1_6mer$VALUE..*log(10)/log(2))
F_1_7mer_t1A=ecdf(expr_1_7mer_t1A$VALUE..*log(10)/log(2))
F_1_7mer_m8=ecdf(expr_1_7mer_m8$VALUE..*log(10)/log(2))
F_1_8mer=ecdf(expr_1_8mer$VALUE..*log(10)/log(2))

name=sub('.dat$','',args[1])
if (name=='GSM210896') title='miR-7, 12h post-transfection'
if (name=='GSM210897') title='miR-7, 24h post-transfection'
if (name=='GSM210898') title='miR-9, 24h post-transfection'
if (name=='GSM210899') title='miR-9, 12h post-transfection'
if (name=='GSM210900') title='miR-122, 12h post-transfection'
if (name=='GSM210901') title='miR-122, 24h post-transfection'
if (name=='GSM210902') title='miR-128a, 12h post-transfection'
if (name=='GSM210903') title='miR-128a, 24h post-transfection'
if (name=='GSM210904') title='miR-132, 24h post-transfection'
if (name=='GSM210905') title='miR-132, 12h post-transfection'
if (name=='GSM210906') title='miR-133a, 12h post-transfection'
if (name=='GSM210907') title='miR-133a, 24h post-transfection'
if (name=='GSM210908') title='miR-142, 12h post-transfection'
if (name=='GSM210909') title='miR-142, 24h post-transfection'
if (name=='GSM210910') title='miR-148b, 12h post-transfection'
if (name=='GSM210911') title='miR-148b, 24h post-transfection'
if (name=='GSM210912') title='miR-181a, 12h post-transfection'
if (name=='GSM210913') title='miR-181a, 24h post-transfection'


pdf(paste('Cumulative_distribution_by_site_type_vs_boxplot_',name,'.pdf',sep=''),width=20,height=6)
par(mfrow=c(1,3))
plot(ecdf(expr_no_site$VALUE..*log(10)/log(2)),xlim=c(-1,1),main=title,xlab='log2(Fold-change)',ylab='Cumulative fraction')
with(environment(F_1_6mer),lines(x,y,col='purple'))
with(environment(F_1_7mer_t1A),lines(x,y,col='orange'))
with(environment(F_1_7mer_m8),lines(x,y,col='green'))
with(environment(F_1_8mer),lines(x,y,col='blue'))
legend("bottomright",c('One 8-mer site','One 7mer-m8 site','One 7mer-A1 site','One 6mer site','No site'),col=c('blue','green','orange','purple','black'),pch='_')
boxplot(expr_no_site$VALUE..*log(10)/log(2),expr_1_6mer$VALUE..*log(10)/log(2),expr_1_7mer_t1A$VALUE..*log(10)/log(2),expr_1_7mer_m8$VALUE..*log(10)/log(2),expr_1_8mer$VALUE..*log(10)/log(2),border=c('black','purple','orange','green','blue'),ylab='log2(Fold-change)')
boxplot(expr_no_site$VALUE..*log(10)/log(2),expr_1_6mer$VALUE..*log(10)/log(2),expr_1_7mer_t1A$VALUE..*log(10)/log(2),expr_1_7mer_m8$VALUE..*log(10)/log(2),expr_1_8mer$VALUE..*log(10)/log(2),border=c('black','purple','orange','green','blue'),ylab='log2(Fold-change)',ylim=c(-1,1))
dev.off()
