Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Enhancements and features #36

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions CLOCKS Multiple Regression and Interaction Model.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Set the working directory to the desired path
setwd()
dir()


CLOCKS = read.table(file="GFCLOCKS.txt", header=TRUE)
ls()
dimnames(CLOCKS)
CLOCKS

Y = CLOCKS[,"PRICE"]
X1 = CLOCKS[,"AGE"]
X2 = CLOCKS[,"NUMBIDS"]
n = length(Y)

print(n)

plot(X1,Y,xlim=c(100,200),ylim=c(500,2200),xlab="Age of clock",ylab = "Price of clock")
plot(X2,Y,xlim=c(5,20),ylim=c(500,2200),xlab="No of bidders",ylab = "Price of clock")

summary(X1)
summary(X2)
summary(Y)

#Building the multiple regression

model= lm(Y~X1 + X2)

summary(model)


#pt and qt class 3/4
# qt gives t value of alpha/2 and df. Test is 2 sided so both tails will have same value.
qt(0.05/2, df=29) #don't specify lower tail gives negative

qt(0.05/2,df = 29, lower.tail = F) #specify lower tail gives positive

#Finding CI for B1 (age of clocks)
#lower tail false for t to be positive
CI95.age = c(12.74,12.74) #12.74 is B1cap value from previous output
CI95.age[1] = CI95.age[1] - qt(0.025,df=29,lower.tail=F)*0.9047 #0.9047 is SE of B1 from previous r output
CI95.age[2] = CI95.age[2] + qt(0.025,df=29,lower.tail=F)*8.73 #8.73 is SE of B2 from previous R output
CI95.age

#interaction model

IntModel = lm(Y~X1+X2+X1:X2)
summary(IntModel) #Even though age is not significant (p=0.66) we keep it because interaction term x1:x2 is significant (p=1.35e-06)

101 changes: 101 additions & 0 deletions EXECSAL2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
id Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
1 11.4436 12 15 1 240 170 1 44 5 0 21
2 11.7753 25 14 1 510 160 1 53 9 0 28
3 11.3874 20 14 0 370 170 1 56 5 0 26
4 11.2172 3 19 1 170 170 1 26 9 0 24
5 11.6553 19 12 1 520 150 1 43 7 0 27
6 11.1619 14 13 0 420 160 1 53 9 0 27
7 11.6457 18 18 1 290 170 1 43 7 0 22
8 11.1927 2 17 1 200 180 1 31 10 0 26
9 11.5954 14 13 1 560 180 1 43 7 0 23
10 11.1360 4 16 1 230 160 1 36 10 0 25
11 11.5327 8 18 1 540 150 1 39 8 1 21
12 11.5268 19 15 1 90 180 1 47 7 1 30
13 11.9144 23 16 1 560 180 1 47 7 1 23
14 10.9526 5 15 0 470 150 1 44 7 1 21
15 11.3783 3 16 1 340 190 1 33 6 1 29
16 11.7830 22 17 1 70 200 1 50 7 1 22
17 11.4109 24 14 0 160 180 1 49 6 1 26
18 11.6579 22 16 1 160 190 1 51 9 1 25
19 11.5405 13 18 1 110 180 1 33 7 1 24
20 11.8629 21 16 1 410 180 1 59 7 0 24
21 11.4175 10 13 1 370 190 1 49 7 0 26
22 11.2037 11 12 1 180 170 1 39 5 0 25
23 11.5229 12 19 1 60 200 1 39 7 0 24
24 11.3551 10 19 1 60 180 1 37 8 0 27
25 11.8372 26 17 1 110 200 1 48 9 0 24
26 11.3181 7 15 1 280 190 1 45 6 0 23
27 11.3563 7 19 1 110 180 1 29 10 0 23
28 11.2292 10 19 0 300 170 1 41 6 0 23
29 11.3794 23 14 0 220 170 1 53 6 0 27
30 11.7527 12 15 1 570 200 1 41 9 0 26
31 11.2910 6 16 1 240 180 1 33 9 0 25
32 11.4175 15 16 0 300 150 1 38 6 0 25
33 11.6046 15 18 1 260 170 1 41 8 0 22
34 11.1662 8 13 1 150 160 1 37 5 0 29
35 11.5560 18 19 0 350 160 1 38 10 0 29
36 11.1732 2 13 1 370 190 1 26 6 0 21
37 11.3551 13 14 1 150 160 1 52 10 0 21
38 11.3998 12 17 0 480 190 1 33 9 0 20
39 11.7345 21 15 1 310 180 1 52 7 0 22
40 10.6643 3 12 0 340 150 1 23 7 0 25
41 11.7361 20 16 1 520 160 1 43 6 0 28
42 11.7134 20 19 1 200 170 1 44 9 0 26
43 11.5815 20 17 0 490 160 1 40 5 0 30
44 11.0186 1 15 0 570 180 1 30 8 0 30
45 10.9988 2 17 1 70 160 1 37 6 0 23
46 11.4690 9 17 1 300 160 1 37 10 0 20
47 11.3574 11 17 0 190 160 1 41 6 0 25
48 11.3953 21 13 0 500 160 1 47 8 0 24
49 11.8706 20 20 1 390 170 1 47 6 0 21
50 11.6009 17 16 0 520 180 0 49 9 0 24
51 11.9621 24 12 1 530 200 0 60 9 0 23
52 11.0837 2 17 0 590 190 0 28 9 0 27
53 11.5703 9 13 1 560 170 0 45 8 0 22
54 11.2159 2 18 0 600 190 0 38 6 0 23
55 11.2810 13 12 0 390 170 0 48 8 0 24
56 11.5768 14 18 1 110 170 0 53 9 0 26
57 11.5750 18 13 1 190 190 0 55 10 0 25
58 11.2567 10 14 1 110 160 0 34 9 0 30
59 11.7707 21 13 1 430 190 0 59 9 0 26
60 11.3218 11 14 0 440 150 0 31 8 0 23
61 11.7448 26 15 1 210 190 0 54 7 0 27
62 11.7110 22 18 1 320 160 0 57 5 0 22
63 11.4742 3 16 1 560 180 0 38 10 0 29
64 11.7668 17 18 1 450 190 0 53 8 0 24
65 11.1872 2 16 1 410 180 0 35 6 0 24
66 10.9819 4 18 0 70 150 0 43 6 0 20
67 11.2810 8 17 1 90 190 0 34 8 0 25
68 11.4731 13 15 1 290 160 0 33 6 0 23
69 11.4606 3 18 1 530 180 0 27 8 0 24
70 11.3964 13 16 0 420 170 0 47 8 0 25
71 11.5973 25 19 0 150 200 0 49 10 0 28
72 11.4648 11 15 1 500 190 0 44 8 0 21
73 11.1732 2 17 0 430 190 0 36 8 0 20
74 12.0634 26 17 1 570 190 0 49 6 0 29
75 11.5806 20 20 1 90 150 0 59 9 0 27
76 11.5129 19 12 1 340 160 0 54 9 0 22
77 11.5199 12 13 1 440 170 0 41 8 0 27
78 11.9369 22 18 1 500 160 0 55 7 0 24
79 11.4648 13 13 0 570 180 0 37 6 0 27
80 11.2554 2 15 1 560 190 0 34 10 0 22
81 11.3457 15 14 1 160 170 0 54 9 1 25
82 11.4360 12 13 1 390 190 0 35 6 1 24
83 11.3609 13 19 0 370 200 0 40 7 1 29
84 11.2823 5 17 1 330 160 0 40 6 1 22
85 11.2910 8 17 0 560 170 0 42 7 1 25
86 11.6448 21 20 0 590 180 0 46 8 1 27
87 11.2709 5 16 1 290 200 0 28 9 1 24
88 11.3771 9 18 0 440 180 0 37 7 1 28
89 11.5415 19 15 0 480 190 0 42 10 1 28
90 11.6639 23 19 1 130 150 0 55 9 0 24
91 10.8493 3 12 0 440 190 0 38 10 0 22
92 11.5759 13 19 1 310 150 0 51 6 0 29
93 11.5991 22 17 0 370 200 0 55 10 0 29
94 11.1065 9 12 0 180 160 0 39 7 0 22
95 11.6182 7 19 1 520 200 0 40 10 0 22
96 11.3278 10 18 0 90 180 0 34 10 0 28
97 11.9798 25 18 1 590 160 0 64 10 0 21
98 11.7159 10 19 1 480 200 0 48 5 0 26
99 11.1169 3 19 1 80 160 0 27 7 0 28
100 11.4917 16 17 0 380 160 0 50 7 0 29
21 changes: 21 additions & 0 deletions EXPRESS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Weight Distance Cost
5.90 47 2.6
3.20 145 3.9
4.40 202 8.0
6.60 160 9.2
0.75 280 4.4
0.70 80 1.5
6.50 240 14.5
4.50 53 1.9
0.60 100 1.0
7.50 190 14.0
5.10 240 11.0
2.40 209 5.0
0.30 160 2.0
6.20 115 6.0
2.70 45 1.1
3.50 250 8.0
4.10 95 3.3
8.10 160 12.1
7.00 260 15.5
1.10 90 1.7
50 changes: 50 additions & 0 deletions EXPRESS_nested_models.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#set directory
setwd()
dir()

#load data
del = read.table(file="EXPRESS.txt", header=TRUE)
ls()
dimnames(del)
del

#get vectors of variables
y = del[,"Cost"] #other way of writing: x1 = del[,1] meaning column 1 of data
x1 = del[,"Weight"]
x2 = del[,"Distance"]
n = length(Y)
n

#make vectors for remaining variables-square terms
x1sq = x1^2
x2sq = x2^2

#Fit 2 models
ReducedModel = lm(y~x1+x2+x1:x2)

CompleteModel = lm(y~x1+x2+x1sq+x2sq+x1:x2)

summary_R=summary(ReducedModel)
summary_C=summary(CompleteModel)

#degree of freedom

df_SSE_R = length(y)-3-1 # 3 because 3 predictors-x1,x2,x1:x2
df_SSE_C = length(y)-5-1

# Finding SSE (SSE is sigma^2 * df)

SSE_R = summary_R$sigma^2*df_SSE_R
SSE_C = summary_C$sigma^2*df_SSE_C

# Finding F stat

F_stat = (SSE_R-SSE_C)/(5-3)/(SSE_C/df_SSE_C)

# Finding p value

pf(F_stat, df1=2, df2=df_SSE_C, lower.tail = F) #Lower tail false because test is one sided

# Doing the comparison of models directly using code

anova(ReducedModel, CompleteModel)
60 changes: 60 additions & 0 deletions Executive_data_variable_screening.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#EXAUSTIVE SEARCH VARIABLE SCREENING

#leaps does 'exhaustive search' for us
library(leaps)

#set directory
setwd("C:/Users/chawl/OneDrive - University of Cincinnati/Prissha/SPRING24/PROB STAT 2")
dir()

#load data
exsal = read.table(file="EXECSAL2.txt", header=TRUE)
ls()

dimnames(exsal)

Y=exsal[,"Y"]
X1=exsal[,"X1"]
X2=exsal[,"X2"]
X3=exsal[,"X3"]
X4=exsal[,"X4"]
X5=exsal[,"X5"]
X6=exsal[,"X6"]
X7=exsal[,"X7"]
X8=exsal[,"X8"]
X9=exsal[,"X9"]
X10=exsal[,"X10"]

#conducting the exhaustive search: we get 10 models
CandModels = regsubsets(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=exsal,nvmax=10,nbest=1)


# Compute r^2, adj r^2, cp for these 10 models
ModelSummary=summary(CandModels)
Rsq = ModelSummary$rsq
adjRsq = ModelSummary$adjr2
Cp=ModelSummary$cp

#display as table, round to 2 decimals
round(rbind(Rsq,adjRsq,Cp),2)

#build the 3 chosen models-4,5,6
best4pred = lm(Y~X1+X2+X3+X4,data=exsal)
best5pred = lm(Y~X1+X2+X3+X4+X5,data=exsal)
best6pred = lm(Y~X1+X2+X3+X4+X5+X9,data=exsal)

#conduct anova test for these models 4 and 5
anova(best4pred,best5pred)

#5 was better from last anova, so run anova for 5 and 6
anova(best5pred,best6pred)

#SO 5 IS MOST SIGNIFICANT, CHECK BETA VALUES
best5pred$coefficients

#Just for backup compare 5 and 6
anova(best5pred,best6pred)

#We confirm that 5 was more significant


33 changes: 33 additions & 0 deletions GFCLOCKS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
AGE NUMBIDS PRICE AGE-BID
127 13 1235 1651
115 12 1080 1380
127 7 845 889
150 9 1522 1350
156 6 1047 936
182 11 1979 2002
156 12 1822 1872
132 10 1253 1320
137 9 1297 1233
113 9 946 1017
137 15 1713 2055
117 11 1024 1287
137 8 1147 1096
153 6 1092 918
117 13 1152 1521
126 10 1336 1260
170 14 2131 2380
182 8 1550 1456
162 11 1884 1782
184 10 2041 1840
143 6 845 858
159 9 1483 1431
108 14 1055 1512
175 8 1545 1400
108 6 729 648
179 9 1792 1611
111 15 1175 1665
187 8 1593 1496
111 7 785 777
115 7 744 805
194 5 1356 970
168 7 1262 1176
Loading