Ci-dessous, les différences entre deux révisions de la page.
Les deux révisions précédentes Révision précédente Prochaine révision | Révision précédente | ||
site:enseignement:master:bdle:annales [21/11/2018 22:03] amine |
site:enseignement:master:bdle:annales [20/11/2019 07:57] (Version actuelle) amine |
||
---|---|---|---|
Ligne 1: | Ligne 1: | ||
====== Recueil d'examens ====== | ====== Recueil d'examens ====== | ||
===== Partie MR et Spark ===== | ===== Partie MR et Spark ===== | ||
+ | ==== Novembre 2019 ==== | ||
+ | {{ :site:enseignement:master:bdle:exam_bdle_nov2018.pdf |Sujet}} | ||
+ | ==== Septembre 2018 ==== | ||
+ | {{ :site:enseignement:master:bdle:ratt_bdle_sep2018.pdf |Sujet }} | ||
==== Novembre 2017 ==== | ==== Novembre 2017 ==== | ||
+ | === Exercice 1 === | ||
+ | == Données == | ||
+ | <code bash> | ||
+ | nom:mcgill,prenom:ben,age:22 | ||
+ | nom:smith,prenom:lara,niveau:4 | ||
+ | nom:snod,prenom:rick,age:27,niveau:5 | ||
+ | nom:kirch,prenom:lars,pays:russia | ||
+ | </code> | ||
+ | == Programme scala == | ||
+ | <code scala> | ||
+ | val data = sc.textFile(path+"users.txt") | ||
+ | |||
+ | case class attribut(cle:String,valeur:String) | ||
+ | |||
+ | def parseElem(in: String): attribut = { | ||
+ | val tmp = in.split(":") | ||
+ | attribut(tmp(0),tmp(1)) | ||
+ | } | ||
+ | |||
+ | parseTuple(in:List[String]) //identique | ||
+ | |||
+ | val parsed = data.map(x=>x.split(",").toList).map(x=>parseTuple(x).sortBy(f=>f.cle)) | ||
+ | |||
+ | parsed.collect.foreach(println) | ||
+ | |||
+ | case class attribut(cle:String,pres:Boolean) | ||
+ | |||
+ | def parseElem(in: String): attribut = { | ||
+ | val tmp = in.split(":") | ||
+ | attribut(tmp(0),true) | ||
+ | } | ||
+ | |||
+ | def parseTuple(in:List[String]): List[attribut] = in.map(x=>parseElem(x)) | ||
+ | |||
+ | |||
+ | |||
+ | val parsed = data.map(x=>x.split(",").toList).map(x=>parseTuple(x).sortBy(f=>f.cle)) | ||
+ | |||
+ | /*non posée*/ | ||
+ | def mergeListAttributes(lat1: List[attribut], lat2: List[attribut]): List[attribut] ={ | ||
+ | ... | ||
+ | } | ||
+ | |||
+ | |||
+ | val synthese = parsed.reduce(mergeListAttributes) | ||
+ | |||
+ | |||
+ | scala> parsed.collect.foreach(println) | ||
+ | List(attribut(age,true), attribut(nom,true), attribut(prenom,true)) | ||
+ | List(attribut(niveau,true), attribut(nom,true), attribut(prenom,true)) | ||
+ | List(attribut(age,true), attribut(niveau,true), attribut(nom,true), attribut(prenom,true)) | ||
+ | List(attribut(nom,true), attribut(pays,true), attribut(prenom,true)) | ||
+ | |||
+ | |||
+ | </code> | ||
=== Exercice 2 : Algèbre Dataset de Spark === | === Exercice 2 : Algèbre Dataset de Spark === | ||
== Données == | == Données == | ||
Ligne 15: | Ligne 74: | ||
m2,isa,tt3 | m2,isa,tt3 | ||
</code> | </code> | ||
+ | == Requêtes == | ||
- | <code scala> | + | <code> |
- | /*preparation*/ | + | //preparation |
case class Triple(sujet: String, prop: String, objet: String) | case class Triple(sujet: String, prop: String, objet: String) | ||
val triples = sc.textFile(someFile). | val triples = sc.textFile(someFile). | ||
Ligne 26: | Ligne 86: | ||
val t1 = triples.withColumnRenamed("sujet","x").withColumnRenamed("objet","y").select("x","y") | val t1 = triples.withColumnRenamed("sujet","x").withColumnRenamed("objet","y").select("x","y") | ||
- | |||
val t2 = triples.withColumnRenamed("sujet","y").withColumnRenamed("objet","z").select("y","z") | val t2 = triples.withColumnRenamed("sujet","y").withColumnRenamed("objet","z").select("y","z") | ||
- | |||
val t3 = triples.withColumnRenamed("sujet","z").withColumnRenamed("objet","x1").select("z","x1") | val t3 = triples.withColumnRenamed("sujet","z").withColumnRenamed("objet","x1").select("z","x1") | ||
- | |||
val res = t1.join(t2, "y").join(t3, "z").where("x=x1") | val res = t1.join(t2, "y").join(t3, "z").where("x=x1") | ||
Ligne 52: | Ligne 109: | ||
join(t_types.withColumnRenamed("sujet","mp"), "mp"). | join(t_types.withColumnRenamed("sujet","mp"), "mp"). | ||
join(t_types.withColumnRenamed("sujet","np").withColumnRenamed("objet", "o"),"np").where("objet!=o") | join(t_types.withColumnRenamed("sujet","np").withColumnRenamed("objet", "o"),"np").where("objet!=o") | ||
- | </scala> | + | </code> |