Table of Contents

WatDiv Query C3 plans

S2RDF plan

val c3AllProp = List( 
  ("wsdbm", "likes"),
  ("wsdbm", "friendOf"), 
  ("dc", "Location"), 
  ("foaf", "age"), 
  ("wsdbm", "gender"), 
  ("foaf", "givenName"))
 
//persist all C3 VPs
c3AllProp.map{case(ns, p) => VP2Random(getIdP(ns, p)).persist().count}
 
 
// Order by increasing tp size
val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s}
order.foreach(println)
/*
(Location,73,4000805)
(age,83,5001999)
(gender,14,6000117)
(givenName,13,7000273)
(likes,15,11210407)
(friendOf,17,450067461)
 
 */
 
 
val VP2EXP=VP2Random
 
// join
val orderedList = List(
  ("dc", "Location"), 
  ("foaf", "age"), 
  ("wsdbm", "gender"), 
  ("foaf", "givenName"),
  ("wsdbm", "likes"),
  ("wsdbm", "friendOf")) 
 
val l1 = orderedList.map{case(ns, p) => {
  val idP = getIdP(ns, p)
  VP2EXP(idP).withColumnRenamed("o", s"o$idP")}}
 
val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s")
//c3.count
//42 845 342
 
 
queryTimeDFIter(c3, 10)
//time=13.8 s SHFR=1.7GB input=5.2GB                   

S2RDF+Hybrid plan

val c3AllProp = List( 
  ("wsdbm", "likes"),
  ("wsdbm", "friendOf"), 
  ("dc", "Location"), 
  ("foaf", "age"), 
  ("wsdbm", "gender"), 
  ("foaf", "givenName"))
 
//persist all C3 VPs
c3AllProp.map{case(ns, p) => VP2(getIdP(ns, p)).persist().count}
 
 
// sort by increasing tp size
val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s}
/*
order.foreach(println)
(Location,73,4000805)
(age,83,5001999)
(gender,14,6000117)
(givenName,13,7000273)
(likes,15,11210407)
(friendOf,17,450067461)
 */
 
// VP's are partitioned by SUBJECT 
val VP2EXP=VP2
 
// join
val orderedList = List(
  ("dc", "Location"), 
  ("foaf", "age"), 
  ("wsdbm", "gender"), 
  ("foaf", "givenName"),
  ("wsdbm", "likes"),
  ("wsdbm", "friendOf")) 
 
 
val l1 = orderedList.map{case(ns, p) => {
  val idP = getIdP(ns, p)
  VP2EXP(idP).withColumnRenamed("o", s"o$idP")}}
 
val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s")
//c3.count
//42 845 342
 
queryTimeDFIter(c3, 10)
//time=6,4s  shfr=0  input=257MB    

Go to SPARQL query processing with Apache Spark