This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
en:site:recherche:logiciels:sparqlwithspark:datasetwatdiv [15/09/2016 10:06] hubert [Load VP's] |
en:site:recherche:logiciels:sparqlwithspark:datasetwatdiv [16/09/2016 23:01] (current) hubert [Load VP's] |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | {{indexmenu_n>1}} | ||
+ | |||
====== Loading WatDiv Dataset ====== | ====== Loading WatDiv Dataset ====== | ||
- | ===== Load and encode data ===== | + | ===== Data preparation: encode raw data ===== |
- | <code> | + | <code scala> |
import org.apache.spark.sql.DataFrame | import org.apache.spark.sql.DataFrame | ||
Line 86: | Line 88: | ||
Create one dataset per property. | Create one dataset per property. | ||
- | <code> | + | <code scala> |
/* | /* | ||
val df = num. | val df = num. | ||
Line 117: | Line 119: | ||
===== Load VP's ===== | ===== Load VP's ===== | ||
- | <code> | + | <code scala> |
// S2RDF VP | // S2RDF VP | ||
Line 128: | Line 130: | ||
val dir = "/user/hubert/watdiv" | val dir = "/user/hubert/watdiv" | ||
- | // 1 billion triple | + | // 1 billion triples |
val scale = "1G" | val scale = "1G" | ||
Line 145: | Line 147: | ||
//val dictSO = sqlContext.read.parquet(dictSOFile).repartition(NB_FRAGMENTS, col("so")) | //val dictSO = sqlContext.read.parquet(dictSOFile).repartition(NB_FRAGMENTS, col("so")) | ||
dictSO.persist().count | dictSO.persist().count | ||
- | //dictSO.unpersist() | ||
// VP Dataset | // VP Dataset | ||
// ------- | // ------- | ||
- | //val encodedFile = dir + "/frame" + scale | ||
val vpDir = dir + "/vp" + scale | val vpDir = dir + "/vp" + scale | ||
- | // CHRONO | + | // TIMER |
def queryTimeDFIter(q: DataFrame, nbIter: Int): Unit = { | def queryTimeDFIter(q: DataFrame, nbIter: Int): Unit = { | ||
var l = new scala.collection.mutable.ArrayBuffer[Double](nbIter) | var l = new scala.collection.mutable.ArrayBuffer[Double](nbIter) | ||
Line 170: | Line 170: | ||
- | // define VPs to be loaded | + | // Define the VPs to be loaded |
//------------------------- | //------------------------- | ||
val nbP = dictP.count.toInt | val nbP = dictP.count.toInt |