Monday 4/24

library(readr)
movies <- read_csv("C:/Users/wooll/OneDrive/MATH325/Data/movies.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   title = col_character(),
##   year = col_integer(),
##   length = col_integer(),
##   budget = col_integer(),
##   votes = col_integer(),
##   mpaa = col_character(),
##   Action = col_integer(),
##   Animation = col_integer(),
##   Comedy = col_integer(),
##   Drama = col_integer(),
##   Documentary = col_integer(),
##   Romance = col_integer(),
##   Short = col_integer()
## )
## See spec(...) for full column specifications.
head(movies)
## # A tibble: 6 × 24
##                      title  year length budget rating votes    r1    r2
##                      <chr> <int>  <int>  <int>  <dbl> <int> <dbl> <dbl>
## 1                        $  1971    121     NA    6.4   348   4.5   4.5
## 2        $1000 a Touchdown  1939     71     NA    6.0    20   0.0  14.5
## 3   $21 a Day Once a Month  1941      7     NA    8.2     5   0.0   0.0
## 4                  $40,000  1996     70     NA    8.2     6  14.5   0.0
## 5 $50,000 Climax Show, The  1975     71     NA    3.4    17  24.5   4.5
## 6                    $pent  2000     91     NA    4.3    45   4.5   4.5
## # ... with 16 more variables: r3 <dbl>, r4 <dbl>, r5 <dbl>, r6 <dbl>,
## #   r7 <dbl>, r8 <dbl>, r9 <dbl>, r10 <dbl>, mpaa <chr>, Action <int>,
## #   Animation <int>, Comedy <int>, Drama <int>, Documentary <int>,
## #   Romance <int>, Short <int>

Average run time of movies

mean(movies$length)
## [1] 82.33788
favstats(movies$length)
##  min Q1 median  Q3  max     mean       sd     n missing
##    1 74     90 100 5220 82.33788 44.34772 58788       0

How many movies were made each year?

barplot(table(movies$year))

Permutations

# Create the data:
set.seed(1140411)
sample1 <- rnorm(30, 69, 2.5)
sample2 <- rnorm(30, 69, 2.5)
theData <- data.frame(values = c(sample1,sample2), group = rep(c(1,2), each=30))
View(theData)
boxplot(values ~ group, data = theData)
 
# Run the permutation test:
myTest <-  t.test(values~group, data=theData, mu = 0)
observedTestStat <- myTest$statistic
 
N <- 2000      
permutedTestStats <-  rep(NA, N)
for  (i in 1:N ) {
   permutedData <- sample(x=theData$group)
   permutedTest <- t.test(values~permutedData, data=theData, mu = 0)
   permutedTestStats[i]  <-  permutedTest$statistic
}
hist(permutedTestStats)
abline(v=observedTestStat)
sum(permutedTestStats >= observedTestStat)/N
sum(permutedTestStats <= observedTestStat)/N

Round 2

# Create the data:
set.seed(121)
sample1 <- rnorm(30, 185, 8)
sample2 <- sample1 - rnorm(30, 0, 3.5)
theData <- data.frame(values = c(sample1,sample2), group = rep(c(1,2), each=30), id = rep(c(1:30),times=2))
View(theData)
with(theData, hist(values[group==1] - values[group==2]))
 
# Perform the permutation test:
myTest <-  t.test(values~group, data=theData, paired=TRUE, mu = 0) 
observedTestStat <- myTest$statistic
 
N <- 2000      
permutedTestStats <-  rep(NA, N)
for  (i in 1:N ) {
   permutedData <- sample(x=c(1,-1), size=30, replace=TRUE)
   permutedTest <- with(theData, t.test(permutedData*(values[group==1] - values[group==2]), mu=0))
   permutedTestStats[i]  <-  permutedTest$statistic
}
hist(permutedTestStats)
abline(v=observedTestStat)
sum(permutedTestStats >= observedTestStat)/N
sum(permutedTestStats <= observedTestStat)/N