val c3AllProp = List( ("wsdbm", "likes"), ("wsdbm", "friendOf"), ("dc", "Location"), ("foaf", "age"), ("wsdbm", "gender"), ("foaf", "givenName")) //persist all C3 VPs c3AllProp.map{case(ns, p) => VP2Random(getIdP(ns, p)).persist().count} // Order by increasing tp size val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s} order.foreach(println) /* (Location,73,4000805) (age,83,5001999) (gender,14,6000117) (givenName,13,7000273) (likes,15,11210407) (friendOf,17,450067461) */ val VP2EXP=VP2Random // join val orderedList = List( ("dc", "Location"), ("foaf", "age"), ("wsdbm", "gender"), ("foaf", "givenName"), ("wsdbm", "likes"), ("wsdbm", "friendOf")) val l1 = orderedList.map{case(ns, p) => { val idP = getIdP(ns, p) VP2EXP(idP).withColumnRenamed("o", s"o$idP")}} val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s") //c3.count //42 845 342 queryTimeDFIter(c3, 10) //time=13.8 s SHFR=1.7GB input=5.2GB
val c3AllProp = List( ("wsdbm", "likes"), ("wsdbm", "friendOf"), ("dc", "Location"), ("foaf", "age"), ("wsdbm", "gender"), ("foaf", "givenName")) //persist all C3 VPs c3AllProp.map{case(ns, p) => VP2(getIdP(ns, p)).persist().count} // sort by increasing tp size val order = c3AllProp.map{ case(ns,p) => (p, getIdP(ns,p), VP2Size.get(getIdP(ns,p)).get)}.sortBy{case (p, idp, s)=> s} /* order.foreach(println) (Location,73,4000805) (age,83,5001999) (gender,14,6000117) (givenName,13,7000273) (likes,15,11210407) (friendOf,17,450067461) */ // VP's are partitioned by SUBJECT val VP2EXP=VP2 // join val orderedList = List( ("dc", "Location"), ("foaf", "age"), ("wsdbm", "gender"), ("foaf", "givenName"), ("wsdbm", "likes"), ("wsdbm", "friendOf")) val l1 = orderedList.map{case(ns, p) => { val idP = getIdP(ns, p) VP2EXP(idP).withColumnRenamed("o", s"o$idP")}} val c3= l1(0).join(l1(1),"s").join(l1(2),"s").join(l1(3),"s").join(l1(4),"s").join(l1(5),"s") //c3.count //42 845 342 queryTimeDFIter(c3, 10) //time=6,4s shfr=0 input=257MB