Monthly Archives: April 2015

log4j

log4j.properties

log4j.rootLogger=DEBUG, file, stdout

# Direct log messages to a log file
log4j.appender.file=org.apache.log4j.RollingFileAppender
log4j.appender.file.File=soap3.log
log4j.appender.file.MaxFileSize=10MB
log4j.appender.file.MaxBackupIndex=10
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n

# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n

log4j.xml

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration debug="true">

    <appender name="console" class="org.apache.log4j.ConsoleAppender">
        <layout class="org.apache.log4j.PatternLayout">
            <param name="ConversionPattern"
                   value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n" />
        </layout>
    </appender>

    <appender name="file" class="org.apache.log4j.RollingFileAppender">
        <param name="append" value="true" />
        <param name="maxFileSize" value="1MB" />
        <param name="maxBackupIndex" value="10" />
        <param name="file" value="test.log" />
        <layout class="org.apache.log4j.PatternLayout">
            <param name="ConversionPattern"
                   value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n" />
        </layout>
    </appender>

    <appender name="mailAppender" class="org.apache.log4j.net.SMTPAppender">
        <param name="BufferSize" value="512" />
        <!--nowcom mail server configuration start-->
        <param name="SMTPHost" value="mail.xxx.com" />
        <param name="SMTPPort" value="25" />
        <param name="From" value="donotreply_log4j@xxx.com" />
        <param name="To" value="pli@xxx.com" />
        <!--nowcom mail server configuration end-->
        <!--gmail configuration start-->
        <!--<param name="SMTPProtocol" value="smtps" />-->
        <!--<param name="SMTPHost" value="smtp.gmail.com" />-->
        <!--<param name="SMTPPort" value="465" />-->
        <!--<param name="SMTPUsername" value="username" />-->
        <!--<param name="SMTPPassword" value="password" />-->
        <!--<param name="From" value="allen.lipeng47@gmail.com" />-->
        <!--<param name="To" value="allen.lipeng47@gmail.com" />-->
        <!--gmail configuration end-->
        <param name="Subject" value="Testing Log4j mail notification" />
        <layout class="org.apache.log4j.PatternLayout">
            <param name="ConversionPattern" value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n" />
        </layout>
        <filter class="org.apache.log4j.varia.LevelRangeFilter">
            <param name="LevelMin" value="warn" />
            <param name="LevelMax" value="fatal" />
        </filter>
    </appender>

    <root>
        <level value="DEBUG" />
        <appender-ref ref="console" />
        <appender-ref ref="file" />
        <appender-ref ref="mailAppender" />
    </root>

</log4j:configuration>

Other LevelRangeFilter option could be:

<filter class="org.apache.log4j.varia.LevelRangeFilter">
    <param name="LevelMin" value="error" />
    <param name="LevelMax" value="fatal" />
</filter>

Good article to explain slf4j, log4j, logback, jdklog, logsimple, nop. http://www.slf4j.org/manual.html

Mysql Million rows test: insertion

These days, I started to use mysql to query table with million rows. In order to get to know mysql performance better. I did this test. Basically, I created different tables with different type/number of indices. I tried to insert as many rows as possible within a certain time. And see the result.

Environment:
Windows 8.1
Memory 12GB
5.1.73-community
CPU AMD A10, 2.1G

index table1(no index)
CREATE TABLE `itable1` (
`Id1` int(11) DEFAULT NULL,
`id2` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT=’insertion test, no index’

index table2(1 int index)
CREATE TABLE `itable2` (
`Id1` int(11) DEFAULT NULL,
`id2` int(11) DEFAULT NULL,
KEY `idx1` (`Id1`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT=’insertion test, 1 int index’

index table3(2 int indices)
CREATE TABLE `itable3` (
`Id1` int(11) NOT NULL DEFAULT ‘0’,
`id2` int(11) DEFAULT NULL,
KEY `idx1` (`Id1`),
KEY `idx2` (`id2`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT=’insertion test, 2 int indices’

index table4(10 int indices)
CREATE TABLE `itable4` (
`Id1` int(11) DEFAULT NULL,
`id2` int(11) DEFAULT NULL,
`id3` int(11) DEFAULT NULL,
`id4` int(11) DEFAULT NULL,
`id5` int(11) DEFAULT NULL,
`id6` int(11) DEFAULT NULL,
`id7` int(11) DEFAULT NULL,
`id8` int(11) DEFAULT NULL,
`id9` int(11) DEFAULT NULL,
`id10` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT=’insertion test, 10 int indices’

index table5(2 varchar indices)
CREATE TABLE `itable5` (
`Id1` varchar(255) DEFAULT NULL,
`id2` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT=’insertion test, 2 varchar indices’

I run for 170 seconds, and the result is below:

Conclusion:
1. The more indices a table has, the slower the insertion does.
2. The more indices a table has, the more space it will take up.
*3. Index table insertion speed is constant as number of row increases.

3* beyong my expectation. Mysql uses B+ tree. For one row insertion, it takes logN time. Continously inserting N rows takes log1 + log2 + … + logN = logN! time. logN! is approximately NlogN. So I expected it is going to be a curve, not linear. Werid…

For my java code, please click here: https://www.github.com/allenlipeng47/MillionTableTest

I think the 2 tables below can generally gives us a feeling about index time / space factor.

Mysql Million rows test: select, join

These days, I started to use mysql to query table with million rows. In order to get to know mysql performance better. I did this test.
Basically, I have 2 big tables and 1 small table. Each big table has 100M items. The small table has 1M items. I tested select, join operations among them indexed filed or no indexed field.

Environment:
Windows 8.1
Memory 12GB
5.1.73-community
CPU AMD A10, 2.1G

big table1:
CREATE TABLE `btable1` (
`indexId1` int(11) NOT NULL AUTO_INCREMENT,
`indexId2` int(11) DEFAULT NULL,
`unIndexVarchar` varchar(255) DEFAULT NULL,
PRIMARY KEY (`indexId1`),
KEY `indexId2` (`indexId2`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT=’100 million rows, 2 indices field’

big table 2:
CREATE TABLE `btable2` (
`indexId` int(11) NOT NULL AUTO_INCREMENT COMMENT ‘indexed primary key’,
`unIndexId` int(11) DEFAULT NULL,
`unIndexVarchar` blob,
PRIMARY KEY (`indexId1`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT=’100 million rows, 1 index field’

small table:
CREATE TABLE `stable` (
`indexId` int(11) NOT NULL AUTO_INCREMENT,
`unIndexId` int(11) DEFAULT NULL,
`unIndexVarchar` blob,
PRIMARY KEY (`indexId`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT=’small table, has 1 million items’

Then, I did the following test among these tables:
1. SmallTable count
select count(*) from stable
result: 1M; time: 2s

2. BigTable count
select count(*) from btable1
result: 100M; time: 110s

3. BigTable join SmallTable. 
select count(*) from btable1 join stable on btable1.indexId1 = stable.unIndexId
result: 1M; time: 9.11s

4. BigTable1 join SmallTable join BigTable2
select count(*) from btable1 join stable on btable1.indexId1 = stable.indexId join btable2 on btable2.unIndexId = stable.indexId
result: 1M; time: 14s

5. BigTable1 join BigTable2 join SmallTable
select count(*) from btable1 join btable2 on btable1.indexId1 = btable2.indexId join stable on btable1.indexId1 = stable.indexId
result: 1M; time: 8.8s

6. SmallTable count, unindex condition
select count(*) from stable where stable.unIndexVarchar=’AK’
result: 1451, time: 2.9s

7. BigTable join SmallTable, unindex condition
select count(*) from btable1 join stable on btable1.indexId1 = stable.indexId where stable.unIndexVarchar=’KA’
result: 1471, time: 3s

8. BigTable join SmallTable, unindex condition
select count(*) from btable1 join stable on btable1.indexId1 = stable.indexId where btable1.unIndexVarchar=’IO’
result: 1543, time: 4.3s

9. BigTable join SmallTable, unindex condition
select count(*) from btable1 join stable on btable1.indexId1 = stable.unIndexId where stable.unIndexVarchar=’AK’

Consolution:
2, 3 shows if a big table join a small table, it will be efficient than counting a big table.
3, 4 shows if a small table joins, and the result is less, it will help to break the big table.
4, 5 shows the join order doesn’t matter, mysql can do a optimizatino on the sql query.
1, 6 shows that these 2 queries has no big difference. They all have O(n) time.
7, 8 shows query with condition on small table will be efficient than condition on a big table.
9 takes longer time than 3. It shows if a condition is without index, it will be inefficient.

Javascript debug in chrome

These days, I’ve been working on google oauth 2. I found chrome is really a good tool for debugging javascript. Following pictures give a summary for the use.

1. Take an example that we want to debug the ajax function on this page.
http://allenlipeng47.com/PersonalPage/dict/toDict

2. Ctrl + Shift + I or Chrome Settings –> More tools –> Developer tools to popup the debug window

3. Go to Network, click the button in dictionary page and see what happens. We captured this ajax process.

4. Check header

5. Check preview

5. Check response

6. Go to sources, let’s set a breakpoint in the javascript program

7. Breakpoint stopped the program

8. In console, we can see the variables and debug.

Google Oauth 2.0

Finally got my google oauth 2.0 worked. It took me 2 weeks researching on it. One suggestion is that we don’t use the provided google packages. I’ve been trying so many java examples, but they always has this or that problem. In the end, I turned back to the pure GET, POST, ajax to test it. And I made it!

1. Register google developer console. Go to developer.google.com/console.

1). Create a Client ID. We only pay attention to the “Authorized redirect URIs”. This one is the url which google will redirect the web to after user grant access right to web application. This one needs to be identical another one we configured in our application later.

2). Create an API key. To my understanding, API key helps let google know which application you are using to access google and get statistics. For test, we set the content empty. This allows everyone from everywhere to call the API.

3). Enable Google+ API. We are going to just read user’s name and email. So we need to open the Google+ API for our API key. Go and enable it.

2. Explanation for the process.
First step is we redirect to google page, to let user grant us access permissions to their google account.
We should be careful that redirect_uri should be the same as we set in google console.
grant_type shouldn’t be ignored. We just set like that.(Even though I don’t know what is it used for).
state is the a parameter we can set no matter what we want. It will be delivered state to server after authorization.
scope is what information you want to read from google account. For example, other scope could be “https://www.googleapis.com/auth/analytics”.
If we want more than 2 scopes, we just add a space between them. Like this:
“&scope=https://www.googleapis.com/auth/plus.profile.emails.read https://www.googleapis.com/auth/analytics”

var end_point = "https://accounts.google.com/o/oauth2/auth?";
var response_type = "response_type=code";
var client_id = "&client_id=272634450431-8svpdb48br706qoo8huv6698gud2e558.apps.googleusercontent.com";
var client_secret = "&client_secret=xxxxxxxxxxxxxxxx";
var redirect_uri = "&redirect_uri=http://www.allenlipeng47.com/PersonalPage/node/oauth.jsp";
var scope = "&scope=https://www.googleapis.com/auth/plus.profile.emails.read";
var state = "&state=oauthtest";
var grant_type = "&grant_type=authorization_code";

function getCode(){
   var url = end_point + response_type + client_id + redirect_uri + scope + state;
   window.location.href = url;
}

After user allowed, google will redirect to the uri we set.
We got the code to request access_token. So, let’s use following function to get access_token.

/**
 * Get token
 */
function getToken(){
   var code = "code=" + $("#code").val();
   var endpoint =  "https://www.googleapis.com/oauth2/v3/token?";
   var url = endpoint + code + redirect_uri + client_id + client_secret + grant_type;
   $.ajax({
      type: "POST",
      url: url,
      success: function(res){
         console.log(res);
         $("#accessToken").val(res.access_token);
      },
      error: function(err){
         console.log(err);
      }
   });
}

Finally, we got the token. Let’s get user information now. Here, we need to use the API key we applied before, put there.

/**
 * Get user information
 */
function getUserInfo(){
   var endpoint = "https://www.googleapis.com/plus/v1/people/me?";
   var token = "access_token=" + $("#accessToken").val();
   var key = "&key=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
   var url = endpoint + token + key;
   $.ajax({
      type: "GET",
      url: url,
      success: function(res){
         $("#name").val(res.displayName);
         $("#email").val(res.emails[0].value);
         console.log(res);
      },
      error: function(err){
         console.log(err);
      }
   });
}

Done!

Following 2 are very similar. It gave me a lot of headache. Be careful to them!
Insufficient Permission” means scope is not enough. We need to add specific scope in the getting code phase.
Invalid Credentials” means token_access parameter doesn’t exist, or it is not right.

I think a html/javascript google oauth is more clear and understandable. This is for tutorial. After we understood the process. We can move to servlet, SpringMVC, let the authorization processed in the server side.

Feel free to test my oauth and the source code: link

Read json

I have following json file and folder structure.

A maven dependency I need is following:

<dependency>
    <groupId>com.googlecode.json-simple</groupId>
    <artifactId>json-simple</artifactId>
    <version>1.1.1</version>
</dependency>

By following colde. I read the json file from it:

package com.pli.project.test;

import org.apache.commons.io.IOUtils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import java.io.*;
import java.util.HashMap;

/**
 * Created by lipeng on 2015/4/4.
 */
public class App {

    public static void main(String[] args) {
        JSONParser parser = new JSONParser();
        try{
            //find json file in resources folder
            ClassLoader classLoader = App.class.getClassLoader();
            InputStream is = classLoader.getResourceAsStream("client_secret.json");
            BufferedReader br = new BufferedReader((new InputStreamReader(is)));
            Object obj = parser.parse(br);
            //read json file
            JSONObject jsonObject = (JSONObject)obj;
            HashMap<String, String> web = (HashMap<String, String>)jsonObject.get("web");
            String client_secret = web.get("client_secret");
            System.out.println(client_secret);
        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

Read Spring Batch parameter to bean

After we add parameters to a job and run it. How do we get the parameter in the class?

In config.xml, we add the following code:

<bean id="beanName" class="com.pli.project.sba">
   <property name="emailFlag" value="#{jobParameters[FAILURE_EMAIL]}"/>
<bean>

<bean class="org.springframework.batch.core.scope.StepScope" />

Set command line parameters in Intellij

package com.pli.project.test;

/**
 * Created by lipeng on 2015/4/4.
 */
public class App {

    public static void main(String[] args) {
        boolean eFlag = false;
        boolean fFlag = false;
        for(String para : args){
            if(para.equals("-e")){
                eFlag = true;
            }
            else if(para.equals("-f")){
                fFlag = true;
            }
        }
        if(eFlag){
            System.out.println("with -e parameter");
        }
        if(fFlag){
            System.out.println("with -f parameter");
        }

    }
}

Run class in jar file

Suppose we have below structured maven project.

After we build it by maven, it generates test-1.0.jar file in target folder. How do we run this App.class in jar?

We can run it by below command:
java -cp test-1.0.jar com.pli.project.test.App

In another case, if there are more than one jar file needed, we can specify more jar files. For example, below is the way how I run a batch job in command line:

windows:
java -cp “dependency-jars/*:SpringBatchLearn-1.0-SNAPSHOT.jar” org.springframework.batch.core.launch.support.CommandLineJobRunner springbatch-config.xml pliJob

linux:
java -cp “dependency-jars/*;SpringBatchLearn-1.0-SNAPSHOT.jar” org.springframework.batch.core.launch.support.CommandLineJobRunner springbatch-config.xml pliJob

Cuckoo hashmap

Cuckoo hashmap is an improved hashmap. It uses the cuckoo bird habit to get rid of other bird’s egg if he found it.

Accordingly, we define 2 hashtables. We can either put in table1 or table2. Assume we want to put a (key1, value1) in table1. But the hash-calculated position of (key1, value1) in table1 is not empty. The position is original a (key2, value2). So we just put (key1, value1) in the hash-calculated position in table1. Instead, we put (key2, value2) in table2. If there is still collision in table2, we do same thing as we did in table1. If we try this for so many times, let’s say more than a threshold, we do a resize method.

Compared to classical hashmap, cuckoo hashmap gurantee the getKey() funcion can be done in O(1) time. Another thing we should notice is that the resize method seems to take O(n) time. But actually, this still gives a amortized O(1) time for insertaion. (refer here for more explanation link).

import java.util.HashMap;
import java.util.Map;

/**
 * Created by lipeng on 2015/4/2.
 */
public class CuckooHashMap {
    private int size = 2;
    private static final int MAX_COMPARE = 16;
    private int entryNum;
    private float FACTOR = 0.75f;
    Entry[][] table = new Entry[2][size];

    CuckooHashMap(){}

    public boolean put(K key, V value){
        for(int i = 0, selectedBucket = 0; i < MAX_COMPARE; i++, selectedBucket = i % 2){
            int hashPos = (key.hashCode()+ selectedBucket * (size / 2)) % size;
            if(table[selectedBucket][hashPos]==null){  //the hash position is null, insert.
                table[selectedBucket][hashPos] = new Entry(key, value);
                if(++entryNum >= size * 2 * FACTOR){
                    resize();
                }
                return true;
            }
            else if(key.equals((K)table[selectedBucket][hashPos].getKey())){
                //Overwrite the value if key are the same
                table[selectedBucket][hashPos].setValue(value);
                return true;
            }
            else {
                //an Entry is in the position, do cuckoo move.
                K tmpKey = (K) table[selectedBucket][hashPos].getKey();
                V tmpValue = (V) table[selectedBucket][hashPos].getValue();
                table[selectedBucket][hashPos].setKey(key);
                table[selectedBucket][hashPos].setValue(value);
                key = tmpKey;
                value = tmpValue;
            }
        }
        //looped for 16 times, hasn't found right position yet. So do resize.
        resize();
        put(key, value);
        return true;
    }

    private boolean resize(){
        entryNum = 0;
        size = size << 1;
        Entry[][] oldTable = table;
        table = new Entry[2][size];
        for(int i=0; i<size / 2; i++){
            if(oldTable[0][i]!=null){
                put((K)oldTable[0][i].getKey(), (V)oldTable[0][i].getValue());
            }
            if(oldTable[1][i]!=null){
                put((K)oldTable[1][i].getKey(), (V)oldTable[1][i].getValue());
            }
        }
        return true;
    }

    public V getKey(K key){
        if(key==null){
            return null;
        }
        for(int selectedBucket = 0; selectedBucket < 2; selectedBucket++){
            int hashPos = (key.hashCode()+ selectedBucket * (size / 2)) % size;
            if(table[selectedBucket][hashPos]==null){
                continue;
            }
            else if(key.equals((K)table[selectedBucket][hashPos].getKey())){
                return (V)table[selectedBucket][hashPos].getValue();
            }
        }
        return null;
    }

    public boolean delete(K key){
        if(key==null){
            return false;
        }
        for(int selectedBucket = 0; selectedBucket < 2; selectedBucket++){
            int hashPos = (key.hashCode()+ selectedBucket * (size / 2)) % size;
            if(table[selectedBucket][hashPos]==null){
                continue;
            }
            else if(key.equals((K)table[selectedBucket][hashPos].getKey())){
                table[selectedBucket][hashPos] = null;
                return true;
            }
        }
        return false;
    }

    static class Entry implements Map.Entry{
        private K k;
        private V v;

        Entry(){}

        Entry(K k, V v){
            this.k = k;
            this.v = v;
        }

        @Override
        public K getKey() {
            return k;
        }

        @Override
        public V getValue() {
            return v;
        }

        @Override
        public V setValue(V value) {
            this.v = value;
            return v;
        }

        public K setKey(K key) {
            this.k = key;
            return k;
        }
    }
}