Created
March 10, 2017 20:20
-
-
Save jcdavis/1cc1b15f60ad7819f5368e8928452025 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.twitter.scalding.Job | |
import com.twitter.scalding.typed.{CoGroupable, Grouped, TypedPipe, UnsortedGrouped} | |
import scala.reflect.runtime.universe | |
object ThriftClassFinder { | |
private val structures: Set[Class[_]] = Set( | |
classOf[TypedPipe[_]], | |
classOf[Grouped[_,_]], | |
classOf[UnsortedGrouped[_,_]], | |
classOf[CoGroupable[_,_]]) | |
/** | |
* Reflect over a scalding job to try and identify thrift types it uses so they can be tokenized by cascading. | |
* For some reason scala reflection is broken with the Hadoop InterfaceAudiance annotation (see | |
* https://issues.scala-lang.org/browse/SI-10129), meaning we can't use scalaType.members, so we instead use java | |
* reflection to iterate over fields to find the ones we care about, and then look those up in scala reflection to | |
* find the full un-erased type signatures, and try to find Thrift types from those. | |
* | |
* Note: this certainly not guaranteed to find every used type. It can't find types used in a step that isn't | |
* referred to in a field, in addition to whatever bugs may exist. | |
*/ | |
def findUsedThriftClasses(jobClazz: Class[_ <: Job]): Seq[Class[_]] = { | |
val mirror = universe.runtimeMirror(jobClazz.getClassLoader) | |
val scalaType = mirror.classSymbol(jobClazz).toType | |
for { | |
field <- jobClazz.getDeclaredFields | |
if structures.contains(field.getType) | |
scalaSignature = scalaType.member(universe.TermName(field.getName)).typeSignature | |
clazz <- getThriftClassesForType(scalaSignature) | |
} yield { | |
clazz | |
} | |
} | |
private def getThriftClassesForType(typeSignature: universe.Type): Seq[Class[_]] = { | |
typeSignature.resultType.typeArgs.flatMap({ generic => | |
//If the wrapped type is a Tuple2, recurse into its types | |
if (generic.typeSymbol.fullName == "scala.Tuple2") { | |
getThriftClassesForType(generic) | |
} else { | |
getThriftClassOpt(generic.typeSymbol.fullName) | |
} | |
}) | |
} | |
private def getThriftClassOpt(name: String): Option[Class[_]] = { | |
try { | |
val clazz: Class[_] = Class.forName(name) | |
if (classOf[org.apache.thrift.TBase[_,_]].isAssignableFrom(clazz)) | |
Some(clazz) | |
else None | |
} catch { | |
// Log something? | |
case _: ClassNotFoundException => None | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment