Untitled

> str(segments)
'data.frame':   11897 obs. of  7 variables:
 $ X                : int  0 1 2 3 4 5 6 7 8 9 ...
 $ SegmentID        : int  72 73 74 75 76 77 78 79 80 81 ...
 $ Chromosome       : int  1 1 2 2 2 3 3 3 3 3 ...
 $ StartPosition    : int  754192 145260908 21494 141215321 141275624 63411 69812903 69884262 126473310 126790130 ...
 $ StopPosition     : int  145258178 249212878 141214996 141275051 243052331 69811900 69884106 126457276 126772699 197852564 ...
 $ Median.Log2.Ratio: num  -0.014 0.311 -0.003 0.059 -0.012 -0.018 -0.106 0.007 -0.171 0.001 ...
 $ FileName         : Factor w/ 95 levels "TSB02183","TSB02184",..: 1 1 1 1 1 1 1 1 1 1 ..

> segments <- read.csv("Probe_Segments_CN.csv")
> cnseg <- CNSeg(segList = segments, chromosome = "Chromosome", end = "StopPosition", start = "StartPosition", segMean = "Median.Log2.Ratio", id = "FileName")
> rdseg <- getRS(cnseg, by = "region", imput = FALSE, XY = FALSE, what = "mean")
Processing samples ... Done
> data("geneInfo")
> geneInfo <- geneInfo[sample(1:nrow(geneInfo), 2000), ]
>
> rdByGene <- getRS(cnseg, by = "gene", imput = FALSE, XY = FALSE, geneMap = geneInfo, what = "median")
>
> reducedseg <- rs(rdseg)
> f1 <- kOverA(5, 1)
>
> ffun <- filterfun(f1)
>
> filteredrs <- genefilter(rdseg, ffun)
> filteredrs <- madFilter(rdseg, 0.8)
> dist(filteredrs)
Error in as.vector(data) :
  no method for coercing this S4 class to a vector
> sessionInfo()
R version 3.4.0 (2017-04-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252

attached base packages:
[1] tools     stats     graphics  grDevices
[5] utils     datasets  methods   base

other attached packages:
[1] CNTools_1.34.0    genefilter_1.60.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.17         AnnotationDbi_1.40.0
 [3] BiocGenerics_0.24.0  splines_3.4.0
 [5] IRanges_2.12.0       bit_1.1-14
 [7] lattice_0.20-35      xtable_1.8-2
 [9] blob_1.1.1           parallel_3.4.0
[11] grid_3.4.0           Biobase_2.38.0
[13] DBI_1.0.0            survival_2.41-3
[15] bit64_0.9-7          digest_0.6.15
[17] Matrix_1.2-9         S4Vectors_0.16.0
[19] bitops_1.0-6         RCurl_1.95-4.10
[21] memoise_1.1.0        RSQLite_2.1.1
[23] compiler_3.4.0       stats4_3.4.0
[25] XML_3.98-1.11        annotate_1.56.2

require(CNTools)
segData <- read.csv("result_cnv.csv", stringsAsFactors = FALSE)
head(segData)

# Create inital CN object
cnseg <- CNSeg(segList = segData, chromosome = "Chromosome", end = "StopPosition", start = "StartPosition", segMean = "Median.Log2.Ratio", id = "FileName")
cnseg

# Create inital RD object
rdseg <- getRS(cnseg, by = "region", imput = FALSE, XY = FALSE, what = "mean")
rdseg

# Collect gene information
data("geneInfo")
geneInfo <- geneInfo[sample(1:nrow(geneInfo), 2000), ]

# Create an RD based on gene information
rdByGene <- getRS(cnseg, by = "gene", imput = FALSE, XY = FALSE, geneMap = geneInfo, what = "median")

# Initalize reduced segment
reducedseg <- rs(rdseg)

# Create a function that evaluates to TRUE if at least 5 of the argument elements are larger than 1
f1 <- kOverA(5, 1)

# Create a filter based on f1
ffun <- filterfun(f1)

# Use the CNTools genefilter
filteredrs <- genefilter(rdseg, ffun)

# Use the CNTools madFilter
filteredrs <- madFilter(rdseg, 0.8)

filteredrs

# Write the filtered data to file
write.csv(attributes(filteredrs)[1], "CNseg_CNOut.csv")

# Reading the recently-written file helps with formatting
CNseg <- read.csv("CNseg_CNOut.csv")

# Keep only the columns with actual sample values
CNseg <- CNseg[5:ncol(CNseg)]

# Calculate eluclidian distance between samples
d <- dist(t(CNseg), method = "euclidean")

# Calculate clustering
hc1 <- hclust(d, method = "ward.D")
plot(hc1, cex = 0.6, hang = -1, main = "Clusters of Copy Number Alterations", xlab = "Euclidean Distance")