Spock Specification Framework – My Study Notes

import spock.lang.Specification

class HelloWorldSpec extends Specification {

    /**
     * Following are the 4 fixture methods
     */
    def setup() { println '  setup' } // run before every feature method
    def cleanup() { println '  cleanup' } // run after every feature method
    def setupSpec() { println 'setupSpec' }// run before the first feature method 
    def cleanupSpec() { println 'cleanupSpec' } // run after the last feature method 

    //Feature method names are string literals. Give meaningful names to them.
    def "Example 1: expect-where model"() {
        expect: "stimulus. Use block descriptions like this to describe a block"
        a == b
        where: "response expected"
        a | b
        1 | 1
    }

    //Features methods should at least have one of the six blocks:
    //setup, expect, where, when, then, cleanup
    def "Example 2: expect-where model with explicit setup block"() {
        setup: "explicit set up block. should be 1st block in a feature. setup: label is optional."
        def stack = new Stack()
        expect: a == b
        where: "where block is always the last block and not repeated"
        a | b; 1 | 1;
    }

    def "Example 3: expect-where model with implicit setup block"() {
        println 'implicit set up' //should be 1st block in a feature. setup: label is optional.
        expect: a == b
        cleanup: 'custom clean up method'
        where: a | b; 1 | 1;
    }

    def "Example 4: when-then model"(){ //when and then always appear together
        setup: "create new stack"
        def stack = new Stack()
        when: "stimulus"
        //may contain any arbitrary code
        stack.pop()

        then: "response - only conditions, exception conditions, interactions, and variable definitions are allowed here"
        true != false //Condition: Evaluated according to Groovy truthness
        thrown(EmptyStackException) //Exception condition - only 1 is allowed in a block
        stack.empty //other conditions can follow exception condition

        cleanup: "custom clean up - should be the last block in when-then model."
    }

    def "Example 5: when-then model continued"(){
        setup:
        def stack = new Stack()
        when: "when and then always appear together"
        stack.pop()

        then: "Alternative exception condition. Another way is using notThrown(exception)"
        EmptyStackException e = thrown()
        e.cause == null
    }

    def "Example 6: given-when-then model "(){ //when and then always appear together
        given: "BDD stories are well described in given-when-then model. given: is just an alias for setup:"
        def stack = new Stack()
        and: "and: is used to describe individual parts of a block"
        when:
        stack.push(1)
        and: "another and: block inside when:"
        println 'hello'

        then: "Another way of exception condition is using notThrown(exception)"
        notThrown(NullPointerException)
    }

    def "Example 7: expect only model"(){
        expect: "more limited compared to 'then' block. may only contain conditions and variable definitions."
        12 > 10 // good for single line expressions. Use this model to express purely functional methods.
    }

    def "Directives"(){
        /*
        @Timeout - Sets a timeout for execution of a feature or fixture method.
        @Ignore - Ignores a feature method.
        @IgnoreRest - Ignores all feature methods not carrying this annotation. 
                      Useful for quickly running just a single method.
        @FailsWith - Expects a feature method to complete abruptly. 
                     It has two use cases:
                     First, to document known bugs that cannot be resolved immediately.
                    Second, to replace exception conditions in certain corner cases 
                    where the latter cannot be used (like specifying the behavior 
                    of exception conditions). 
                    In all other cases, exception conditions are preferable.
        */
    }

    def "Example 8: Simple Parameterization"(){
        expect: a + b == c
        where: "triggers 3 iterations with element (i) from each list"
        a << [1,2,3]
        b << [2,3,4]
        c << [3,5,7]
    }

    def "Example: Interactions example - events are published to all subscribers"() {
        setup: "Interactions outside of then: are global whose scope is within the feature method"
        def subscriber1 = Mock(Subscriber) //dynamic mocking
        Subscriber subscriber2 = Mock() //static mocking
        def publisher = new PublisherImpl()
        publisher.add(subscriber1)
        publisher.add(subscriber2)
        subscriber1.isAlive() >> true

        when:
        publisher.send("event")
        subscriber1.isAlive()

        then: "Optional and required interactions"

        //cardinality is mandatory for required interactions
        // n * subscriber.receive(event)      // exactly n times
        // (n.._) * subscriber.receive(event) // at least n times
        // (_..n) * subscriber.receive(event) // at most n times
        1 * subscriber1.receive("event") //asserts the required interaction happened exactly once
        1 * _.receive("event") // asserts the receive is called on any mock object

        subscriber1.isAlive() >> true // optional interaction
    }
}

interface Publisher{
    void add(Subscriber subscriber)
    void send(event)
}

interface Subscriber{
    void receive(event)
    boolean isAlive()
}

class PublisherImpl implements Publisher{
    def subscribers = []
    @Override
    void add(Subscriber subscriber) {
        subscribers.add(subscriber)
    }

    @Override
    void send(Object event) {
        subscribers.each {subscriber -> subscriber.receive(event)}
    }
}

Groovy – My Study Notes – Part 1

Overview

  • Use Groovy for flexibility and readability. Use Java for performance
  • Runs on JVM – Groovy is nothing but a new way of creating Java classes – Java code can be called from Groovy and vice-versa
  • Every Groovy type is a subtype of java.lang.Object – Every Groovy object is an instance of a type in the normal way
  • Groovy class IS A Java class
  • Groovy supports dynamic typing
  • To compile a Groovy script – { groovyc –d classes Foo.groovy }
  • To run a compiled Groovy class in Java – { java -cp $GROOVY_HOME/embeddable/groovy-all-1.0.jar:classes Foo }
  • To run a Groovy script – {groovy Foo.groovy}
  • Behind the scenes it compiles to a Java class and executes
  • Any Groovy code can be executed this way as long as it can be run; that is, it is either a script, a class with a main method, a Runnable, or a GroovyTestCase.
  • Groovy is purely object-oriented
    • everything is an object. E.g 2*3 //though they look like primitives, they are actually java.lang.Integer objects
    • every operator is a method call. E.g. a+b //logic for the + operator is implemented in method plus() on the object
  • Groovy automatically imports following packages: groovy.lang.*, groovy.util.*, java.lang.*, java.util.*, java.net.*, and java.io.* as well as the classes java.math.BigInteger and BigDecimal.
  • Say there is a Groovy class called Foo, we can use Foo objects without explicitly compiling the Book class as long as Foo.groovy is on the classpath.
  • A Groovy script can also have class definitions inside them. 

Control Structure

Boolean Evaluation

Groovy’s == Is Equal to Java’s equals only if the class does not implement the Comparable interface. If it does, then it maps to the class’s compareTo() method.. Reference comparison is done via is() method. Custom truth conventions can be added by implementing asBoolean() method.
str = 'Hello'
if(str) println str + 'World' //Groovy checks if the object reference is null
list = [1]
if(list) println list //Groovy checks if list is not-null and not empty

Safe-navigation operator (?.) 

eliminates the mundane null check. If input is null, returns null instead NPE
def foo(str) { if (str != null) { str.reverse() } } //Before
def foo(str) { str?.reverse() } //After

Looping methods

Using Ranges: for(i in 0..5){println i}  //prints 0,1,2,3,4
Using times function: 5.times { println "$it" } //prints 0,1,2,3,4
Using upto function: 0.upto(5) { println "$it" } //prints 0,1,2,3,4
Using step function: 0.step(5, 2) { println "$it" } //prints 0,2,4

Static imports

import static Math.random as rand 
double value = rand() // alias name is used here to avoid confusion among static imports

OOPS

  • All methods and classes are public by default.
  • Getters and setters are automatically created by Groovy. No setters created for final fields. To prevent non-final fields from modification, implement setter method manually and throw an error.
  • “hello”.class.name instead of “hello”.getClass().getName(). This class property has special meaning in Map and Builders so it won’t work.
  • We can use ‘this’ within static methods to refer to the Class object.

Basics

Optional Parameters

  • With Default value
def log(x, base=10) { Math.log(x) / Math.log(base) }
log(1024) //default base 10 is used
log(1024, 2)
  • Trailing array parameter as optional. Much like Java varargs.
def task(name, String[] details) { println "$name - $details" }
task 'name1'
task 'name2', 'blah..'
task 'name3', 'blah..blah..'

Named arguments in method calls

  • Class with no-argument constructor
class Robot { def type, height, width }
robot = new Robot(type: 'arm', width: 10, height: 40)
println "$robot.type, $robot.height, $robot.width"
  • Excess Parameters as Map – If the number of arguments sent is more than what the method parameters, and if the excess arguments are in name-value pair, then Groovy treats the name-value pairs as a Map.
class Robot { 
  def access(location, weight, fragile) {
    println "Received fragile? $fragile, weight: $weight, loc: $location"
  }
}
new Robot().access(x: 30, y: 20, z: 10, 50, true)
//You can change the order
new Robot().access(50, true, x: 30, y: 20, z: 10, a:5)

Multiple Assignments

  • Method returning an array is assigned to multiple variables 
def splitName(fullName) { fullName.split(' ') }
def (firstName, lastName) = splitName('James Bond')
println "$lastName, $firstName $lastName"
  • Swapping two variables without a temporary variable using above technique
def (first, last) = ["James", "Bond"]
(first, last) = [last, first]
println "$first $last"

Implementing Interface

Block of code morphed as the implementation of an interface

interface Greeting { void greet(greeting) }
interface WellWisher { void wish(wish) }
void greeter(Greeting greeting){ greeting.greet()}
void wellwisher(WellWisher wellwisher){ wellwisher.wish()}

greeter(new Greeting(){ void greet(greeting){println 'Java style'}}) 
groovyStyle = {println 'Groovy style'}
greeter(groovyStyle as Greeting) 
//block of code is morphed into an implementation of the
// interface via 'as' operator
wellwisher(groovyStyle as WellWisher)
  • Groovy does not force us to implement all the methods in an interface. Very useful while mocking for unit testing.
  • Implementation of multi-method interface as a Map
interface Greeting { void greet(greeting); void wish(wish); void regard(regard); }
void callMe(Greeting greeting){ greeting.greet(); greeting.wish()}
//method name as key, implementation as value. Not all methods are implemented
greetingsMap = [ greet: {println 'Greet Hello World'}, wish: {println 'Wish Hello World'} ] 
callMe(greetingsMap as Greeting)

Operator Overloading

Each operator has a standard mapping to methods.

== equals
+ plus
- minus
++ next
.. next (for-each syntax)
-- previous
<< leftShift
<=> compareTo
  • Example 1: for (ch in ‘a’..’c’) { println ch }
  • Example 2: lst = [‘hello’]; lst << ‘there’; println lst
  • Example 3: Custom class and operator overriding
class Name{
  def name; 
  def plus(other){
    new Name(name: name + "~~" + other.name)
  }
  String toString() { "name: " + name}
}
def name1 = new Name(name: "Hello")
def name2 = new Name(name: "World")
println name1 + name2

Annotations

  • groovyc ignores @Override
  • @Canonical – auto-generates toString() implementation as comma-separated field values
import groovy.transform.*
@Canonical(excludes="age, password")
class Person {
  String firstName, lastName, password
  int age
}
def sara = new Person(firstName: "Sara", lastName: "Walker", age: 49, password: "passw0rd")
println sara
  • @Delegate
import groovy.transform.*
class Worker {
  def work() { println 'get work done' }
  def analyze() { println 'analyze...' }
  def writeReport() { println 'get report written' }
}
class Expert {
  def analyze() { println "expert analysis..." }
}
class Manager {
  //At compile time, Groovy examines the Manager class and brings 
  // in methods from the delegated classes only if those methods 
  // don’t already exist
  @Delegate Expert expert = new Expert() 
  //only work() and writeReport() methods are brought here
  @Delegate Worker worker = new Worker()
}
def bernie = new Manager()
bernie.analyze()      //invokes Expert.analyze()
bernie.work()         //invokes Worker.work()
bernie.writeReport()  //invokes Worker.writeReport
  • @Immutable – Groovy adds the hashCode(), equals(), and toString() methods
import groovy.transform.*
@Immutable
class CreditCard { String cardNumber; int creditLimit }
println new CreditCard("4000-1111-2222-3333", 1000)
  • @Lazy – provides a painless way to implement the virtual proxy pattern with thread safety as a bonus
class AsNeeded {
def value
  //heavy1 and heavy2 are lazy-initialized only at the time of invocation
  @Lazy Heavy heavy1 = new Heavy()
  @Lazy Heavy heavy2 = { new Heavy(size: value) }()
  AsNeeded() { println "Created AsNeeded" }
}
  • @Newify – Create objects via Ruby-like and Python-like constructors without using ‘new Foo()’ style. Comes handy in DSL creation.
@Newify([CreditCard, Person]) //specify the list of types here. 
def fluentCreate() {
  println CreditCard("1234-5678-1234-5678", 2000) //Python-like constructor invocation with new keyword
  println Person.new("John", "Doe") //Ruby-like constructor invocation where new() is a method
}
fluentCreate()
  • @Singleton
@Singleton(lazy = true)
class TheUnique {
  private TheUnique() { println 'Instance created' }
  def hello() { println 'hello' }
}
TheUnique.instance.hello()
TheUnique.instance.hello()
new TheUnique().hello() //Caveat: since Groovy does not honor private methods, clients can still do this.
  • @InheritConstructors
@Canonical
class Car {
  def make, model, year
  Car(make, model){ this.make = make; this.model = model; this.year = 2000; }
  Car(make, model, year){ this.make = make; this.model = model; this.year = year; }
}
@InheritConstructors
class Honda extends Car{
  //no need to explicitly override all the constructors here
}
println new Car("Honda", "Accord")

How to host a static web site in Amazon S3?

As you all know, Amazon Web Services (AWS) provides cloud storage via its S3 service. You could not only store raw data files, but also host static web sites (HTML, CSS, JS) on it. Here is how you do it.

Step 1: Create account in AWS

Create your free account in AWS and sign up for S3 service.

Step 2: Create a bucket

In order to store and organize your files, you need to first create a bucket in S3. Go to the AWS Console (https://console.aws.amazon.com/) and create a bucket. Let’s call it ‘hello-world’.

Step 3: Configure bucket permissions

Add permission to make the bucket content publicly available. For static web hosting, the content has to be publicly available.
Web Console -> Properties -> Permissions -> Add/Edit Bucket Policy -> (enter below content) -> Save

{
  "Version":"2012-10-17",
  "Statement":[{
    "Sid":"PublicReadForGetBucketObjects",
    "Effect":"Allow",
    "Principal": {
     "AWS": "*"
    },
    "Action":["s3:GetObject"],
    "Resource":["arn:aws:s3:::hello-world/*"
    ]
    }
  ]
}

Step 4: Enable bucket for Static Web Hosting

Web Console -> Properties -> Static Web Hosting -> Enable website hosting -> Save
Provide Index document (e.g., index.html) and Error Document (e.g., 404.html)

Note down the Endpoint address which would look something like this: hello-world.s3-website-us-east-1.amazonaws.com

If you want to configure your own domain, follow the details in this link .

Step 5: Add files to the bucket

You could add folders and files under the bucket via the web console. However for bulk upload, using a FTP client program like S3 Browser is more efficient. To use a client program, you need to create an access key as follows.

AWS Console -> Security Credentials -> Access Keys -> Create new access key
Access Key: ?????????????????
Secret Key: ?????????????????????????????????????

And, now your web site should be available at http://hello-world.s3-website-us-east-1.amazonaws.com for public access.

Groovy Tip: Running Groovy from Sublime Text Editor

I just came across this cool little trick to compile and run Groovy scripts from Sublime Text Editor.

  • Open the editor and go to menu Tools->Build System. This will open a new file named ‘untitled.sublime-build‘.
  • Add the path to your Groovy installation directory as show below.
{
 "cmd": ["C:/apps/Groovy/groovy-2.1.9/bin/groovy.bat", "$file"]
}
  • Now ‘save’ the file as ‘groovy.sublime-build‘.
  • Open your groovy script and in the menu select ‘Tools->Build System->Groovy’
  • To compile and execute, ‘Tools->Build’ or F7 or Ctrl+B.

 

Hadoop Ecosystem

Hadoop Ecosystem

Apache Hadoop is the talk of the town pretty much all over the Big Data world. For the beginners of Big Data and Hadoop, there are quite of few terminologies, frameworks, libraries, etc. to digest to get a feel of the Hadoop Ecosystem.

In the process of learning them myself, I came across a wonderful article written from Edd Dumbill. I have excerpted few notes from his article and formatted in tabular form in this blog entry.

Apache HadoopHadoop Logo

  • an open-source software framework for storage and large scale processing of data-sets on clusters of commodity hardware.
  • It is a Batch-oriented system.
  • Components
    • MapReduce – Framework developed by Google for parallel computation on server clusters.

    • HDFS (Hadoop Distributed File System) – Distributed redundant file system for storing unstructured and schemaless data in Hadoop.
    • YARN (Yet Another Resource Negotiator) – a framework for job scheduling and cluster resource management.

HadoopEcosystem

Programmability

Pig

Pig

  • High-level programming language that simplifies the common tasks of working with Hadoop: loading data, expressing transformations on the data, and storing the final results.
  • Pig’s built-in operations can make sense of semi-structured data, such as log files.
  • Main advantage is to drastically cut the amount of code needed compared to direct use of Hadoop’s Java APIs.

Hive

Hive

  • Enables Hadoop to operate as a data warehouse with SQL-like access. Easily integratable via JDBC/ODBC.
  • It superimposes structure on data in HDFS, and then permits queries over the data using a familiar SQL-like syntax.
  • More suitable for data warehousing tasks.

Data Collection

Sqoop

Sqoop

  • a tool to import data from relational databases into Hadoop: either directly into HDFS, or into Hive

Flume

Logo

  • a tool to import streaming flows of log and event data directly into HDFS
  • Efficient service for collecting, aggregating, and moving large amounts of log data.

Chukwa

Chukwa

  • open source data collection system for monitoring large distributed systems
  • built-on top of HDFS and MapReduce

Data Serialization

Avro

Avro

  • Data-serialization framework
  • Primarily used in Hadoop for both a serialization format for persistent data, and a wire format for communication between Hadoop nodes, and from client programs to the Hadoop services.

Configuration and Coordination

Zookeeper

  • a tool for configuration management and coordination of computing nodes in a cluster

Workflow

Oozie

Oozie

  • Orchestration and workflow management tool to manage the workflow and dependencies, removing the need for developers to code custom solutions.

Deployment, Monitoring and Administration

Ambari

ApacheAmbari

  • Tool to help system administrators deploy and configure Hadoop, upgrade clusters, and monitor services. Through an API it may be integrated with other system management tools.
  • Developed by HortonWorks

Whirr

  • Tool for cloud-agnostic deployment of clusters, offers a way of running services, including Hadoop, on cloud platforms.
  • Currently supports the Amazon EC2 and Rackspace services.

Machine Learning

Mahout

ApacheMahout

  • Library of machine learning and data mining algorithms.
  • Use cases include user collaborative filtering, user recommendations, clustering and classification.

Databases

HBaseApacheHBase
  • a column-oriented database scaling to billions of rows
  • Runs on top of HDFS for rapid data access.
  • MapReduce can use HBase as both a source and a destination for its computations.
  • Hive and Pig can be used in combination with HBase.