Ci-dessous, les différences entre deux révisions de la page.
Les deux révisions précédentes Révision précédente Prochaine révision | Révision précédente Prochaine révision Les deux révisions suivantes | ||
site:recherche:logiciels:rdfdist [11/05/2015 14:01] amine |
site:recherche:logiciels:rdfdist [12/05/2015 13:08] amine |
||
---|---|---|---|
Ligne 108: | Ligne 108: | ||
val t1 = java.lang.System.currentTimeMillis(); | val t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | |||
+ | /** | ||
+ | * set inputData with the path to the data encoded as quadruples (see Datasets excerpts) | ||
+ | */ | ||
// loading and transformating the dataset | // loading and transformating the dataset | ||
Ligne 149: | Ligne 154: | ||
val takco : Long = 1115684864 | val takco : Long = 1115684864 | ||
- | def ajout(a : ListBuffer[(Long, Long, Long)], e: (Long, Long, Long) ) : ListBuffer[(Long, Long, Long)] = { | ||
- | a += e | ||
- | return a | ||
- | } | ||
// ----------------------------------------------------------- | // ----------------------------------------------------------- | ||
Ligne 260: | Ligne 261: | ||
====Graph partitioning-based approaches==== | ====Graph partitioning-based approaches==== | ||
+ | ===Huang Approach === | ||
+ | <code> | ||
+ | import org.apache.spark.HashPartitioner | ||
+ | import scala.collection.mutable.ListBuffer | ||
+ | |||
+ | val folder= "lubm" //"watdiv" | ||
+ | val dataset= "univ" //"watdiv" | ||
+ | val scale="1k" | ||
+ | val part=20 //10, 20 | ||
+ | |||
+ | val folderName = folder +scale | ||
+ | val fileName = dataset+scale+"_encoded_unique_quads.part."+part+".2hop" | ||
+ | |||
+ | val t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | val quads_I_SPO = sc.textFile(s"/user/olivier/${folderName}/${fileName}").coalesce(part).map(x=>x.split(",")).map(t=>(t(3).replace(")","").toLong, (t(0).replace("(","").toLong,t(1).toLong,t(2).toLong))) | ||
+ | |||
+ | val quadsDist = quads_I_SPO.partitionBy(new HashPartitioner(part)).persist | ||
+ | |||
+ | |||
+ | |||
+ | val t2 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | print("Loading time of quads : "+(t2-t1)/1000 +" sec") | ||
+ | |||
+ | val advisor : Long = 1233125376 | ||
+ | val worksFor : Long = 1136656384 | ||
+ | val suborg : Long = 1224736768 | ||
+ | val memof : Long = 113246208 | ||
+ | val undeg : Long = 1101004800 | ||
+ | val teaof : Long = 1199570944 | ||
+ | val takco : Long = 1115684864 | ||
+ | |||
+ | |||
+ | // ----------------------------------------------------------- | ||
+ | // Query 1 : (not part of the benchmark) | ||
+ | // Pattern: (x advisor y) (y worksFor z) (z subOrganisation t) | ||
+ | // ----------------------------------------------------------- | ||
+ | |||
+ | var t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | var pataws = quadsDist.filter({case(i,(s,p,o)) => p==advisor}).map({case(i,(s,p,o)) => (o,s)}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==worksFor}).map({case(i,(s,p,o)) => (s,o)}),part). | ||
+ | map({case (y,(x,z)) => (z,(x,y))}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==suborg}).map({case(i,(s,p,o)) => (s,o)}), part). | ||
+ | map({case (z,((x,y),t)) => (x,y,z,t)}) | ||
+ | |||
+ | pataws.count | ||
+ | |||
+ | var pataws2 = pataws.flatMap(x=>x).distinct | ||
+ | |||
+ | var t2= java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | println("Processing Q1 "+ (t2 - t1) +" msec for "+part+" partitions"); | ||
+ | |||
+ | |||
+ | // ----------------------------------------------------------- | ||
+ | // LUBM 2 : MSU | ||
+ | // Pattern: (x memberOf y) (y subOrg z) (x UndergraduateDegreeFrom z) | ||
+ | // ----------------------------------------------------------- | ||
+ | |||
+ | var t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | //var pmemof = quadsDist.filter({case(i,(s,p,o)) => p==memof}).cache() | ||
+ | |||
+ | var patmsu = quadsDist.filter({case(i,(s,p,o)) => p==memof}).map({case(i,(s,p,o)) => (o,s)}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==suborg}).map({case(i,(s,p,o)) => (s,o)}),part). | ||
+ | map({case (y,(x,z)) => (x+""+z,(x,y,z))}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==undeg}).map({case(i,(x,p,z))=> (x+""+z,null)})) | ||
+ | |||
+ | patmsu.count | ||
+ | |||
+ | var patmsu2 = patmsu.flatMap(identity).distinct | ||
+ | |||
+ | var t2= java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | println("Processing Q2 "+ (t2 - t1) +" msec for "+part+" partitions"); | ||
+ | |||
+ | // ----------------------------------------------------------- | ||
+ | // LUBM 9 : ATT | ||
+ | // Pattern: (x advisor y) (y teacherOf z) (x takesCourse z) | ||
+ | // ----------------------------------------------------------- | ||
+ | |||
+ | var t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | var patatt = quadsDist.filter({case(i,(s,p,o)) => p==advisor}).map({case(i,(s,p,o)) => (o,s)}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==teaof}).map({case(i,(s,p,o)) => (s,o)}), part). | ||
+ | map({case (y,(x,z)) => (x+""+z,(x,y,z))}). | ||
+ | join(quadsDist.filter({case(i,(s,p,o)) => p==takco}).map({case(i,(s,p,o))=> (s+""+o,null)}), part) | ||
+ | |||
+ | patatt.distinct.count | ||
+ | |||
+ | var t2= java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | println("Processing Q3 (LUBM #9) "+ (t2 - t1) +" msec for "+part+" partitions"); | ||
+ | |||
+ | |||
+ | // ----------------------------------------------------------- | ||
+ | // LUBM 12 : WS | ||
+ | // Pattern: (y worksFor z) (z subOrganisation t) | ||
+ | // ----------------------------------------------------------- | ||
+ | |||
+ | val t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | val patws = quadsDist.filter({case(i,(s,p,o)) => p==worksFor}).map({case(i,(s,p,o)) => ((i,o),s)}).join(quadsDist.filter({case(i,(s,p,o)) => p==suborg}).map({case(i,(s,p,o)) => ((i,s),o)}),part).map({case ((i,k),(s,o)) => (s,o)}) | ||
+ | |||
+ | val ans_patws = patws.distinct.count | ||
+ | |||
+ | val t2= java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | println("Processing LUBM #12 "+ (t2 - t1) +" msec for "+part+" partitions"); | ||
+ | </code> | ||
+ | |||
+ | ===Warp=== | ||
<code> | <code> | ||
// Spark implementation of WARP replication | // Spark implementation of WARP replication | ||
Ligne 289: | Ligne 404: | ||
val folderName = folder + scale | val folderName = folder + scale | ||
val fileName = dataset + scale | val fileName = dataset + scale | ||
+ | /** | ||
+ | * set inputData with the path to the data encoded as quadruples (see Datasets excerpts) | ||
+ | */ | ||
val inputData = s"/user/olivier/${folderName}/${fileName}_encoded_unique_quads.part.${machine}" | val inputData = s"/user/olivier/${folderName}/${fileName}_encoded_unique_quads.part.${machine}" | ||
Ligne 616: | Ligne 734: | ||
</code> | </code> | ||
+ | ===2-hop based approach=== | ||
+ | <code> | ||
+ | val folder= "lubm" | ||
+ | val dataset= "univ" | ||
+ | val scale="10k" | ||
+ | val folderName = folder +scale | ||
+ | val part = Array(5,10,20) | ||
+ | |||
+ | for (p <- part) | ||
+ | { | ||
+ | val fileName = dataset+scale+"_encoded_unique_quads.part."+p | ||
+ | val fileNamewatdiv2k_encoded_unique_quads.partNew.5 | ||
+ | val t1 = java.lang.System.currentTimeMillis(); | ||
+ | |||
+ | val quads = sc.textFile(s"/user/olivier/${folderName}/${fileName}").map(x=>x.split(",")).map(t=>(t(0).replace("(","").toLong,t(1).toLong,t(2).toLong,t(3).replace(")","").toLong)) | ||
+ | |||
+ | var addOneHop = quads.map({case(s,p,o,i)=>(o,i)}).join(quads.map({case(s,p,o,i)=>(s,(p,o,i))})).filter({case(termS,(i1,(p,o,i2)))=>i1!=i2}).distinct.map({case(termS,(i1,(p,o,i2)))=>(termS,p,o,i1)}) | ||
+ | |||
+ | val newQuads = quads.union(addOneHop).distinct | ||
+ | val newQuadsSize = newQuads.count | ||
+ | |||
+ | val t2 = java.lang.System.currentTimeMillis(); | ||
+ | val hopSize = addOneHop.count | ||
+ | println(s"Time to compute one more hop on $folderName for $p partitions is ${t2-t1}") | ||
+ | println(s" new size = $newQuadsSize , added $hopSize") | ||
+ | newQuads.saveAsTextFile(s"/user/olivier/${folderName}/${fileName}.2hop") | ||
+ | } | ||
+ | </code> | ||
====Datasets excerpts==== | ====Datasets excerpts==== | ||
+ | ===Encoding of LUBM concepts and properties=== | ||
+ | |||
+ | <code> | ||
+ | Properties: | ||
+ | 0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> | ||
+ | 603979776 <http://www.univ-mlv.fr/~ocure/lubm.owl#officeNumber> | ||
+ | 671088640 <http://www.univ-mlv.fr/~ocure/lubm.owl#name> | ||
+ | 738197504 <http://www.univ-mlv.fr/~ocure/lubm.owl#title> | ||
+ | 805306368 <http://www.univ-mlv.fr/~ocure/lubm.owl#age> | ||
+ | 872415232 <http://www.univ-mlv.fr/~ocure/lubm.owl#telephone> | ||
+ | 939524096 <http://www.univ-mlv.fr/~ocure/lubm.owl#emailAddress> | ||
+ | 1006632960 <http://www.univ-mlv.fr/~ocure/lubm.owl#researchInterest> | ||
+ | 1082130432 <http://www.univ-mlv.fr/~ocure/lubm.owl#researchProject> | ||
+ | 1090519040 <http://www.univ-mlv.fr/~ocure/lubm.owl#hasAlumnus> | ||
+ | 1098907648 <http://www.univ-mlv.fr/~ocure/lubm.owl#degreeFrom> | ||
+ | 1101004800 <http://www.univ-mlv.fr/~ocure/lubm.owl#undergraduateDegreeFrom> | ||
+ | 1103101952 <http://www.univ-mlv.fr/~ocure/lubm.owl#mastersDegreeFrom> | ||
+ | 1105199104 <http://www.univ-mlv.fr/~ocure/lubm.owl#doctoralDegreeFrom> | ||
+ | 1107296256 <http://www.univ-mlv.fr/~ocure/lubm.owl#orgPublication> | ||
+ | 1115684864 <http://www.univ-mlv.fr/~ocure/lubm.owl#takesCourse> | ||
+ | 1124073472 <http://www.univ-mlv.fr/~ocure/lubm.owl#member> | ||
+ | 1132462080 <http://www.univ-mlv.fr/~ocure/lubm.owl#memberOf> | ||
+ | 1136656384 <http://www.univ-mlv.fr/~ocure/lubm.owl#worksFor> | ||
+ | 1138753536 <http://www.univ-mlv.fr/~ocure/lubm.owl#headOf> | ||
+ | 1140850688 <http://www.univ-mlv.fr/~ocure/lubm.owl#teachingAssistantOf> | ||
+ | 1149239296 <http://www.univ-mlv.fr/~ocure/lubm.owl#listedCourse> | ||
+ | 1157627904 <http://www.univ-mlv.fr/~ocure/lubm.owl#softwareDocumentation> | ||
+ | 1166016512 <http://www.univ-mlv.fr/~ocure/lubm.owl#publicationAuthor> | ||
+ | 1174405120 <http://www.univ-mlv.fr/~ocure/lubm.owl#softwareVersion> | ||
+ | 1182793728 <http://www.univ-mlv.fr/~ocure/lubm.owl#affiliateOf> | ||
+ | 1191182336 <http://www.univ-mlv.fr/~ocure/lubm.owl#tenured> | ||
+ | 1199570944 <http://www.univ-mlv.fr/~ocure/lubm.owl#teacherOf> | ||
+ | 1207959552 <http://www.univ-mlv.fr/~ocure/lubm.owl#publicationDate> | ||
+ | 1216348160 <http://www.univ-mlv.fr/~ocure/lubm.owl#affiliatedOrganizationOf> | ||
+ | 1224736768 <http://www.univ-mlv.fr/~ocure/lubm.owl#subOrganizationOf> | ||
+ | 1233125376 <http://www.univ-mlv.fr/~ocure/lubm.owl#advisor> | ||
+ | 1241513984 <http://www.univ-mlv.fr/~ocure/lubm.owl#publicationResearch> | ||
+ | |||
+ | Concepts: | ||
+ | 0 <http://www.univ-mlv.fr/~ocure/lubm.owl#Schedule> | ||
+ | 268435456 <http://www.univ-mlv.fr/~ocure/lubm.owl#Organization> | ||
+ | 301989888 <http://www.univ-mlv.fr/~ocure/lubm.owl#College> | ||
+ | 335544320 <http://www.univ-mlv.fr/~ocure/lubm.owl#Department> | ||
+ | 369098752 <http://www.univ-mlv.fr/~ocure/lubm.owl#Institute> | ||
+ | 402653184 <http://www.univ-mlv.fr/~ocure/lubm.owl#ResearchGroup> | ||
+ | 436207616 <http://www.univ-mlv.fr/~ocure/lubm.owl#Program> | ||
+ | 469762048 <http://www.univ-mlv.fr/~ocure/lubm.owl#University> | ||
+ | 536870912 <http://www.univ-mlv.fr/~ocure/lubm.owl#Publication> | ||
+ | 570425344 <http://www.univ-mlv.fr/~ocure/lubm.owl#Software> | ||
+ | 603979776 <http://www.univ-mlv.fr/~ocure/lubm.owl#Book> | ||
+ | 637534208 <http://www.univ-mlv.fr/~ocure/lubm.owl#Specification> | ||
+ | 671088640 <http://www.univ-mlv.fr/~ocure/lubm.owl#Manual> | ||
+ | 704643072 <http://www.univ-mlv.fr/~ocure/lubm.owl#Article> | ||
+ | 713031680 <http://www.univ-mlv.fr/~ocure/lubm.owl#TechnicalReport> | ||
+ | 721420288 <http://www.univ-mlv.fr/~ocure/lubm.owl#ConferencePaper> | ||
+ | 729808896 <http://www.univ-mlv.fr/~ocure/lubm.owl#JournalArticle> | ||
+ | 738197504 <http://www.univ-mlv.fr/~ocure/lubm.owl#UnofficialPublication> | ||
+ | 805306368 <http://www.univ-mlv.fr/~ocure/lubm.owl#Person> | ||
+ | 872415232 <http://www.univ-mlv.fr/~ocure/lubm.owl#TeachingAssistant> | ||
+ | 939524096 <http://www.univ-mlv.fr/~ocure/lubm.owl#Student> | ||
+ | 956301312 <http://www.univ-mlv.fr/~ocure/lubm.owl#GraduateStudent> | ||
+ | 973078528 <http://www.univ-mlv.fr/~ocure/lubm.owl#UndergraduateStudent> | ||
+ | 1006632960 <http://www.univ-mlv.fr/~ocure/lubm.owl#Employee> | ||
+ | 1015021568 <http://www.univ-mlv.fr/~ocure/lubm.owl#ResearchAssistant> | ||
+ | 1023410176 <http://www.univ-mlv.fr/~ocure/lubm.owl#Director> | ||
+ | 1031798784 <http://www.univ-mlv.fr/~ocure/lubm.owl#AdministrativeStaff> | ||
+ | 1033895936 <http://www.univ-mlv.fr/~ocure/lubm.owl#SystemsStaff> | ||
+ | 1035993088 <http://www.univ-mlv.fr/~ocure/lubm.owl#ClericalStaff> | ||
+ | 1040187392 <http://www.univ-mlv.fr/~ocure/lubm.owl#Faculty> | ||
+ | 1042284544 <http://www.univ-mlv.fr/~ocure/lubm.owl#PostDoc> | ||
+ | 1044381696 <http://www.univ-mlv.fr/~ocure/lubm.owl#Professor> | ||
+ | 1044643840 <http://www.univ-mlv.fr/~ocure/lubm.owl#Chair> | ||
+ | 1044905984 <http://www.univ-mlv.fr/~ocure/lubm.owl#VisitingProfessor> | ||
+ | 1045168128 <http://www.univ-mlv.fr/~ocure/lubm.owl#AssociateProfessor> | ||
+ | 1045430272 <http://www.univ-mlv.fr/~ocure/lubm.owl#Dean> | ||
+ | 1045692416 <http://www.univ-mlv.fr/~ocure/lubm.owl#FullProfessor> | ||
+ | 1045954560 <http://www.univ-mlv.fr/~ocure/lubm.owl#AssistantProfessor> | ||
+ | 1046478848 <http://www.univ-mlv.fr/~ocure/lubm.owl#Lecturer> | ||
+ | 1073741824 <http://www.univ-mlv.fr/~ocure/lubm.owl#Work> | ||
+ | 1140850688 <http://www.univ-mlv.fr/~ocure/lubm.owl#Course> | ||
+ | 1174405120 <http://www.univ-mlv.fr/~ocure/lubm.owl#GraduateCourse> | ||
+ | 1207959552 <http://www.univ-mlv.fr/~ocure/lubm.owl#Research> | ||
+ | </code> | ||
+ | |||
+ | ===LUBM Univ1 === | ||
+ | -[[http://webia.lip6.fr/~baazizi/research/iswc2015eval/sources/univ1_encoded_unique.id|encoded triples]](2.1MB) | ||
+ | |||
+ | -[[http://webia.lip6.fr/~baazizi/research/iswc2015eval/sources/quads_plus_replicas.id|encoded quaruples with replication]](2.3MB) | ||
+ | |||
+ | -[[http://webia.lip6.fr/~baazizi/research/iswc2015eval/sources/quads.id|replicated quaruples]](0.3MB) | ||
+ | |||
+ | |||
+ | -[[http://webia.lip6.fr/~baazizi/research/iswc2015eval/sources/univ1.nt|univ1.nt]] (16.3MB) |