val c3AllProp = List(
("wsdbm", "likes"),
("wsdbm", "friendOf"),
("dc", "Location"),
("foaf", "age"),
("wsdbm", "gender"),
("foaf", "givenName"))
//persist all C3 VPs
c3AllProp.map{case(ns, p) => VP2Random(getIdP(ns, p)).persist().count}
// Order by increasing tp size
val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s}
order.foreach(println)
/*
(Location,73,4000805)
(age,83,5001999)
(gender,14,6000117)
(givenName,13,7000273)
(likes,15,11210407)
(friendOf,17,450067461)
*/
val VP2EXP=VP2Random
// join
val orderedList = List(
("dc", "Location"),
("foaf", "age"),
("wsdbm", "gender"),
("foaf", "givenName"),
("wsdbm", "likes"),
("wsdbm", "friendOf"))
val l1 = orderedList.map{case(ns, p) => {
val idP = getIdP(ns, p)
VP2EXP(idP).withColumnRenamed("o", s"o$idP")}}
val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s")
//c3.count
//42 845 342
queryTimeDFIter(c3, 10)
//time=13.8 s SHFR=1.7GB input=5.2GB
val c3AllProp = List(
("wsdbm", "likes"),
("wsdbm", "friendOf"),
("dc", "Location"),
("foaf", "age"),
("wsdbm", "gender"),
("foaf", "givenName"))
//persist all C3 VPs
c3AllProp.map{case(ns, p) => VP2(getIdP(ns, p)).persist().count}
// sort by increasing tp size
val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s}
/*
order.foreach(println)
(Location,73,4000805)
(age,83,5001999)
(gender,14,6000117)
(givenName,13,7000273)
(likes,15,11210407)
(friendOf,17,450067461)
*/
// VP's are partitioned by SUBJECT
val VP2EXP=VP2
// join
val orderedList = List(
("dc", "Location"),
("foaf", "age"),
("wsdbm", "gender"),
("foaf", "givenName"),
("wsdbm", "likes"),
("wsdbm", "friendOf"))
val l1 = orderedList.map{case(ns, p) => {
val idP = getIdP(ns, p)
VP2EXP(idP).withColumnRenamed("o", s"o$idP")}}
val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s")
//c3.count
//42 845 342
queryTimeDFIter(c3, 10)
//time=6,4s shfr=0 input=257MB
Go to SPARQL query processing with Apache Spark