#3.1
x<- c(350, 200,240,290,90,370,240)
y <-c(480,130,250,310,280,1450,280)
z=y-x
zsort= sort(z)
Tplus=0
x = rank(abs(z))
for (i in 1:length(z))
{	if (z[i]>0)
	{	Tplus = Tplus + x[i]
	}
}
#p-value
#table A.4, get  P(Tplus>=24)=0.055
#at alpha = 0.05 can reject
#at alpha = 0.01 cannot reject

#3.2
x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
wilcox.test(y - x, alternative = "less")  
wilcox.test(y - x, alternative = "less",
            exact = FALSE, correct = FALSE) # H&W large sample
                                            # approximation
x <- c(1.83,  0.50,  16.2,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
wilcox.test(y - x, alternative = "less")   
wilcox.test(y - x, alternative = "less",
            exact = FALSE, correct = FALSE) # H&W large sample
                                            # approximation
#no effect on the Tplus  -> test is robust to outliers
#Example
x <- c(1.83,  0.50, 1.30)
y <- c(0.878, 0.647, 1.29)
wilcox.test(y - x, alternative = "less") 

x <- c(-58.3,  0.50, 1.30)
y <- c(0.878, 0.647, 1.29)
wilcox.test(y - x, alternative = "less")



#3.3
#set up Tplus + Tminus = sum(Rphi) + sum(R(1-phi))
#distibute and knot R~discrete uniform, Sum(R) = N(N+1)/2

#3.6
#largest Tplus when all phi=1, so N(N+1)/2
#smallest, 0

#3.7
n=14
#H0 theta=0 and H1 theta>0
#when alpha = 0.039 then t=81
qsignrank(0.039,14,FALSE)

#large sample approx
#for 
alpha=0.039
qnorm(1-alpha)
#reject Tstar>=1.76
#so reject it 
ans = n*(n+1)/4+qnorm(1-alpha)*(n*(n+1)*(2*n+1)/24)^0.5
#Tplus>=ans
#ans on A.4 for n=14, is between [0.039,0.045]
#this is larger than alpha 
#large sample approx yields larger alpha

#3.11
#class notes (9/17)
#alpha = 2^(-n), 2^(-(n+1))
#b/c Tplus = sum(ib), where b is bern(.5)
#alpha =  2^(-n) to be zero or n(n+1)/2, all b = 1/2, have n of then
#alpha =  2^(-(n+1) to be one of n(n+1)/2 -1, all b but one have to be 1 and one is zero, or all zero
#and have one b=1.  


#3.19
x <-c(350, 200, 240, 290, 90, 370, 240)
y<-c(480,30,250,310,280,1450,280)
M = length(x)*(length(x)+1)/2
z=y-x
z1=sort(z)
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
theta = median(W_sort)


#3.20
x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
z=y-x
z1=sort(z)
avg = mean(z)
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
theta = median(W_sort)
theta
avg

#trial 2 with outlier
x <- c(1.83,  0.50,  16.2,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
z=y-x
z1=sort(z)
avg = mean(z)
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
theta = median(W_sort)
theta
avg

#shows that theta is more robust to outliers than the average


#problem 3.23
#a) add b to z values, what happens to theta
x<-c(1.82, 0.50,1.62,2.48,1.68,1.88,1.55,3.06,1.30)
y <-c(0.878,0.647,0.598,2.05,1.06,1.29,1.06,3.14,1.29)
z <-y-x
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

z <-y-x +5
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

#new estimate is theta+b (where b is constant)

#b) what if multiply by d, each Z
z <-y-x
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

z <-5*z
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

#new estimate is theta+b (where b is constant)

#c) for k, some positive, such that n>2k
z <-y-x
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)
z_sort = sort(z)
#let k=2  - then 9>2(2)

z_sort2 = z_sort[3:7]
wilcox.test(z_sort2,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

#for larger n, let k=25, then 100>2(25)
x1 <-rnorm(100,0,1)
y1 <-rnorm(100,2,1)
z1 <-y1-x1
wilcox.test(z1,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)
z1_sort= sort(z1)
k<-2
z1_sort2 = z1_sort[k+1:length(z1)-k+1]
wilcox.test(z1_sort2,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)



#3.27
x=c(350,200,240,290,90,370,240)
y=c(480,130,250,310,280,1450,280)
z=y-x
z1=sort(z)
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
n=7
alpha = 0.023
#then
t=26
t = qsignrank(0.023,7,F)
C = n*(n+1)/2 +1-t 
W_sort[C]
W_sort[t]
#different answer because approx
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.95)

#3.33
#alpha = 2/2^n
#see class notes 09/17
#P(Tplus = n(n+1)/2) = 1/2^n
#P(Tplus = 0) = 1/2^n
#then t= n(n+1)/2 so
#C = n*(n+1)/2 +1-t 
#  = n*(n+1)/2 +1 - n(n+1)/2
#C = 1 -> W^(1)  = Z^(1)
#W ^t = W^(n(n+1)/2) = Z^(n)



#3.36
x=c(350,200,240,290,90,370,240)
y=c(480,130,250,310,280,1450,280)
z=y-x
#different answer because approx
ans1=wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.954)
ans1$conf.int
#increase alpha
ans2=wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.97)
ans2$conf.int
#decreased alpha
ans3=wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.90)
ans3$conf.int

#how does vary alpha affect estimator in (comment 23)

x=c(350,200,240,290,90,370,240)
y=c(480,130,250,310,280,1450,280)
z=y-x
z1=sort(z)
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
n=7
alpha = 0.023
#then
t=26
t = qsignrank(0.023,7,F)
C = n*(n+1)/2 +1-t 
theta1 = (W_sort[(C+t)/2])

#increase alpha
t = qsignrank(0.05,7,F)
C = n*(n+1)/2 +1-t 
theta2 = (W_sort[(C+t)/2])

#decreased alpha
t = qsignrank(0.01,7,F)
C = n*(n+1)/2 +1-t 
theta3 = (W_sort[(C+t)/2])

theta = median(W_sort)
theta1
theta2
theta3


#3.37
x=c(350,200,240,290,90,370,240)
y=c(480,130,250,310,280,1450,280)
z=y-x
wilcox.test(y - x, alternative = "two.sided",exact = FALSE, correct = FALSE,conf.int = TRUE,conf.level = 0.954)
n=length(x)
alpha = 1-0.95
qnorm(1-alpha/2)
C = n*(n+1)/4 - qnorm(1-alpha/2)*(n*(n+1)*(2*n+1)/24)^0.5
C = floor(C)
z1=sort(z)
M = n*(n+1)/2
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
W_sort[C]
W_sort[M+1-C]

#wider then exact confidence coeffienct [-25,605]


#3.44
x1<- c(5.8,13.5,26.1,7.4,7.6,23,10.7,9.1,19.3,26.3,17.5,17.9,18.3,
14.2,55.2,15.4,30,21.3,26.8,8.1,24.3,21.3,18.2,22.5,31.1)
y1 <-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
zi =  y1-x1
B= sum(y1-x1>0)
#binom test for p
binom.test(sum(y1-x1>0),length(x1),0.5)

#p-value
pbinom(B-1,length(x1),0.5,FALSE)
#or
1-pbinom(B-1,length(x1),0.5)

#get package(BSDA)
library(BSDA)

#get estimate for theta
sign.test(y1-x1)
#estimate

#from table, A.2, n=25, p=0.5, b=18
#reject if B>=b=18
#B=21, so reject

#large sample approx
B=21
n=25
p=0.5
Bstar= (B-n*p)/(n*p*(1-p))^.5
1-pnorm(Bstar,0,1)

#change to add outlier
x2<- c(5.8,13.5,26.1,7.4,7.6,23,10.7,9.1,19.3,26.3,17.5,17.9,18.3,
14.2,55.2,15.4,30,21.3,26.8,8.1,24.3,21.3,18.2,22.5,31.1)
y2 <-c(5,21,173,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
zi2 =  y2-x2
sign.test(y1-x1)
#binom.test(sum(y1-x1>0),length(x1),0.5)
sign.test(y2-x2)
#binom.test(sum(y2-x2>0),length(x2),0.5)

#Tplus in both were same, no effect on decision, still reject


#example where made difference
#tried to do a loop and nothing
x3<- c(5.8,13.5,26.1,7.4,7.6,23,10.7,9.1,19.3,26.3,17.5,17.9,18.3,
14.2,55.2,15.4,30,21.3,26.8,8.1,24.3,21.3,18.2,22.5,31.1)
y3  <-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
sign.test(y3-x3)
for (i in 1:10000)
{	xrandnum = rnorm(1,0,500)
	y3_out <-c(5,21,xrandnum ,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
	46,71,31,33)
	ans = sign.test(y3_out-x3)
	if (18 > ans$rval$statistic)
	{
		break 
	}
}
i
#above, check i after loop, 


#3.45
n=25
p=0.5
B = qbinom(0.05,n,p)
qnorm(0.0539)
Bstar1 = n/2 + qnorm(0.0539)*(n/4)^0.5
Bstar = n-Bstar1

#3.46
x<-c(270, 150, 270, 420,202,255,165,220,305,210,240,300,300,70)
y<-c(525,570,190,395,370,210,490,250,360,285,630,385,195,295)
z=y-x
sign.test(z)
binom.test(sum(y-x>0),14,0.5)
#qbinom(0.0898,14,0.5,F)


#3.60
x1<- c(5.8,13.5,26.1,7.4,7.6,23,10.7,9.1,19.3,26.3,17.5,17.9,18.3,
14.2,55.2,15.4,30,21.3,26.8,8.1,24.3,21.3,18.2,22.5,31.1)
y1 <-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
z1=y1-x1
y2 <-c(5,21,173,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
z2=y2-x1
avg1 = mean(z1)
avg2 = mean(z2)
avg1
avg2
#average increase by 4
#173-26.1 = 146.1
#73-26.1 =46.1
#increase difference by 100, since n=25 ->4

#affect median
x1<- c(5.8,13.5,26.1,7.4,7.6,23,10.7,9.1,19.3,26.3,17.5,17.9,18.3,
14.2,55.2,15.4,30,21.3,26.8,8.1,24.3,21.3,18.2,22.5,31.1)
y1 <-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
z1=y1-x1
theta1 = median(sort(z1))
y2 <-c(5,21,173,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,
46,71,31,33)
z2=y2-x1
theta2 = median(sort(z2))
theta1
theta2
#more robust



#3.61
x<-c(349,400,520,490,574,427,435)
y<-c(425,533,362,628,463,427,449)
#thetahat
z=y-x
n=length(x)
z1=sort(z)
M = n*(n+1)/2
W=matrix(NA,length(z1),length(z1))
for (i in 1:length(z1))
{	for (j in 1:length(z1))
	{	if (i<=j)
		{	W[i,j]<-(z1[i]+z1[j])/2
		}
	}
}
W_sort= sort(W)
thetahat = median(W_sort)
thetahat
thetatilde = z1[sum(y-x>0)]
thetatilde 
#thetahat =12.25
#thetatilde =14
#larger estimate

#3.62
x<-c(349,400,520,490,574,427,435)
y<-c(425,533,362,628,463,427,449)
#thetatilde
z=y-x
thetatilde = median(sort(z))
thetatilde 

#a)add b to each Zi
z1=y-x+5
thetatilde1 = median(sort(z1))
thetatilde1 
#thetatilde increase by b


#b) multiply d to each Zi
z2=5*(y-x)
thetatilde2 = median(sort(z2))
thetatilde2 
#thetatilde increase by multply of d

#c) discard large k and smallest k, for n>2k
x1 <-rnorm(100,0,1)
y1 <-rnorm(100,2,1)
z1 <- y1-x1
thetatilde = median(sort(z1))
thetatilde 
#sign.test(z1)
k<-49
z1_sort= sort(z1)
thetatilde2 = median(z1_sort[k+2:length(z1)-k-2])
thetatilde2 
#sign.test(z1_sort[k+2:length(z1)-k-2])

#always smaller when values are discarded
#in 3.23 the values were always bigger

#3.71
x<-c(270, 150, 270, 420,202,255,165,220,305,210,240,300,300,70)
y<-c(525,570,190,395,370,210,490,250,360,285,630,385,195,295)
z=y-x
z_sort = sort(z)
LCL = z_sort[4]
LCL 
UCL = z_sort[11]
UCL
ans = sign.test(y-x, md = 0, alternative = "two.sided", conf.level = 0.9426)
ans$rval$conf.int

#3.74
x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
z=y-x
z_sort = sort(z)
LCL = z_sort[2]
LCL 
UCL = z_sort[8]
UCL
ans = sign.test(y-x, md = 0, alternative = "two.sided", conf.level = 0.9610)
ans$rval$conf.int

#3.76
#how does vary alpha affect CI
x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
#alpha = 1-0.9610 = 0.039
ans = sign.test(y-x, md = 0, alternative = "two.sided", conf.level = 0.9610)

#increase alpha
#alpha = 1-0.94 = 0.6
ans1 = sign.test(y-x, md = 0, alternative = "two.sided", conf.level = 0.94)

#decrease alpha
#alpha = 1-0.98 = 0.2
ans2 = sign.test(y-x, md = 0, alternative = "two.sided", conf.level = 0.98)

ans1$rval$conf.int
ans$rval$conf.int
ans2$rval$conf.int
#CI changes width 

#change the estimate of theta based on comment 52
x<-c(270, 150, 270, 420,202,255,165,220,305,210,240,300,300,70)
y<-c(525,570,190,395,370,210,490,250,360,285,630,385,195,295)
z=y-x
#for alpha = 0.0574, n=14
z_sort = sort(z)
LCL = z_sort[4]
LCL 
UCL = z_sort[11]
UCL
thetaest = z_sort[(4+11)/2]
thetaest 
theta =median(z_sort)


#increase alpha =0.1796 n=14
LCL = z_sort[5]
LCL 
UCL = z_sort[10]
UCL
thetaest = z_sort[(5+10)/2]
thetaest 

#decrease alpha =0.013 n=14
LCL = z_sort[3]
LCL 
UCL = z_sort[12]
UCL
thetaest = z_sort[(3+12)/2]
thetaest 

#not affected

#3.91
z<-c(254,171,345,134,190,447,106,173,449,198)
zi = z-175
wilcox.test(zi,alternative="greater",paired=F)

#get theta - use z
z<-c(254,171,345,134,190,447,106,173,449,198)
W_mat=matrix(NA,length(z),length(z))
for (i in 1:length(z))
{	for (j in 1:length(z))
	{	if (i<=j)
		{	W_mat[i,j]<-(z[i]+z[j])/2
		}
	}
}
W_sort = sort(W_mat)
theta = median(W_sort)
theta

#confidence interval
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.962)
LCL = W_sort[8]
LCL
UCL = W_sort[48]
UCL


#3.94
z<-c(.32,.21,.28,.15,.08,.22,.17,.35,.20,.31,.17,.11)
zi=z-0.25
B=sum(zi>0)
B
sign.test(z,md=.25,alternative = "less")
z_sort = sort(z)
theta = median(z_sort)
theta

sign.test(z, md = 0.25, alternative = "two.sided", conf.level = 0.9614)
LCL = z_sort[3]
LCL
UCL = z_sort[10]
UCL


#3.98
z<-c(339,349,387,159,579,586,519,275)
zi=z-350
B=sum(zi>0)
B
sign.test(z,md=350)
z_sort = sort(z)
theta = median(z_sort)
theta
sign.test(z, md = 350, alternative = "two.sided", conf.level = 0.9296)
LCL = z_sort[2]
LCL
UCL = z_sort[7]
UCL


#3.99
z<-c(254,171,345,134,190,447,106,173,449,198)
zi = z-175
B = sum(zi>0)
B
#P(B>=6|p=0.5) for n=10 is 0.377 from A.2
sign.test(z,md=175,alternative="greater")

#theta
z_sort = sort(z)
theta = median(z_sort)
theta
#from 3.91 theta was 226

#92.6% CI
sign.test(z,md=175,alternative="two.sided",conf.level=0.925)
wilcox.test(z,alternative="two.sided",paired=F,conf.int=T,conf.level=0.916)