メモ。HiveServer用に定義されている各メソッドをThrift RPCで呼んだときに何が返ってくるのよ、と。順序的には、基本的には execute をまずクエリつきで呼んで、それに対してクエリプラン・結果データのスキーマ・結果データそのものを取得する、というかたち。"getClusterStatus"だけはクライアントからいつでも発行できる。(のかな?)
コードはこんな感じ。
var thrift = require('thrift'), ttransport = require('thrift/transport'), ThriftHive = require('gen-nodejs/ThriftHive'); var connection = thrift.createConnection("localhost", 10000, {transport: ttransport.TBufferedTransport, timeout: 600*1000}), client = thrift.createClient(ThriftHive, connection); connection.on('error', function(err) { console.error(err); }); connection.addListener("connect", function() { client.getClusterStatus(function(err, data){ console.log("getClusterStatus:", data); client.execute('select x, count(*) as cnt from p group by x sort by cnt limit 10', function(err){ if (err) { console.error("error on execute(): " + err); process.exit(1); } client.getQueryPlan(function(err, data){ console.log("getQueryPlan:", data); console.log("queryplan queryAttributes:", data.queries[0].queryAttributes); console.log("queryplan stageGraph:", data.queries[0].stageGraph); console.log("queryplan stageGraph adjacencyList children:", data.queries[0].stageGraph.adjacencyList[0].children); console.log("queryplan stageGraph adjacencyList children:", data.queries[0].stageGraph.adjacencyList[1].children); console.log("queryplan stageList:", data.queries[0].stageList); console.log("queryplan stageList taskList:", data.queries[0].stageList[0].taskList[0]); console.log("queryplan stageList taskList operatorGraph adjacencyList:", data.queries[0].stageList[0].taskList[0].operatorGraph.adjacencyList); console.log("queryplan stageList taskList:", data.queries[0].stageList[0].taskList[1]); console.log("queryplan stageList taskList:", data.queries[0].stageList[1].taskList[0]); console.log("queryplan stageList taskList:", data.queries[0].stageList[1].taskList[1]); console.log("queryplan stageList taskList:", data.queries[0].stageList[2].taskList[0]); console.log("queryplan stageList taskList:", data.queries[0].stageList[2].taskList[1]); client.getSchema(function(err, data){ console.log("getSchema:", data); client.getThriftSchema(function(err,data){ console.log("getThriftSchema:", data); client.fetchAll(function(err, data){ if (err){ console.error("error on fetchAll(): " + err); process.exit(1); } console.log("fetchAll:", data); connection.end(); process.exit(0); }); }); }); }); }); }); });
結果。
getClusterStatus: { taskTrackers: 1, mapTasks: 0, reduceTasks: 0, maxMapTasks: 2, maxReduceTasks: 2, state: 2 } getQueryPlan: { queries: [ { queryId: 'hadoop_20110408164747_f3ab8cdc-4064-40b9-81c5-928d81f4b1ce', queryType: null, queryAttributes: [Object], queryCounters: null, stageGraph: [Object], stageList: [Object], done: true, started: true } ], done: false, started: false } queryplan queryAttributes: { queryString: 'select x, count(*) as cnt from p group by x sort by cnt limit 10' } queryplan stageGraph: { nodeType: 1, roots: null, adjacencyList: [ { node: 'Stage-1', children: [Object], adjacencyType: 0 }, { node: 'Stage-2', children: [Object], adjacencyType: 0 } ] } queryplan stageGraph adjacencyList children: [ 'Stage-2' ] queryplan stageGraph adjacencyList children: [ 'Stage-3' ] queryplan stageList: [ { stageId: 'Stage-1', stageType: 3, stageAttributes: null, stageCounters: { 'CNTR_NAME_Stage-1_REDUCE_PROGRESS': 100, 'CNTR_NAME_Stage-1_MAP_PROGRESS': 100 }, taskList: [ [Object], [Object] ], done: true, started: true }, { stageId: 'Stage-2', stageType: 3, stageAttributes: null, stageCounters: { 'CNTR_NAME_Stage-2_MAP_PROGRESS': 100, 'CNTR_NAME_Stage-2_REDUCE_PROGRESS': 100 }, taskList: [ [Object], [Object] ], done: true, started: true }, { stageId: 'Stage-3', stageType: 3, stageAttributes: null, stageCounters: { 'CNTR_NAME_Stage-3_REDUCE_PROGRESS': 100, 'CNTR_NAME_Stage-3_MAP_PROGRESS': 100 }, taskList: [ [Object], [Object] ], done: true, started: true } ] queryplan stageList taskList: { taskId: 'Stage-1_MAP', taskType: 0, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object], [Object], [Object] ] }, operatorList: [ { operatorId: 'TS_124', operatorType: 9, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'SEL_125', operatorType: 8, operatorAttributes: null, operatorCounters: null, done: true, started: true }, { operatorId: 'GBY_126', operatorType: 5, operatorAttributes: null, operatorCounters: null, done: true, started: true }, { operatorId: 'RS_127', operatorType: 11, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } queryplan stageList taskList operatorGraph adjacencyList: [ { node: 'TS_158', children: [ 'SEL_159' ], adjacencyType: 0 }, { node: 'SEL_159', children: [ 'GBY_160' ], adjacencyType: 0 }, { node: 'GBY_160', children: [ 'RS_161' ], adjacencyType: 0 } ] queryplan stageList taskList: { taskId: 'Stage-1_REDUCE', taskType: 1, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object], [Object] ] }, operatorList: [ { operatorId: 'GBY_128', operatorType: 5, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'SEL_129', operatorType: 8, operatorAttributes: null, operatorCounters: null, done: true, started: true }, { operatorId: 'FS_137', operatorType: 10, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } queryplan stageList taskList: { taskId: 'Stage-2_MAP', taskType: 0, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object] ] }, operatorList: [ { operatorId: 'TS_138', operatorType: 9, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'RS_130', operatorType: 11, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } queryplan stageList taskList: { taskId: 'Stage-2_REDUCE', taskType: 1, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object], [Object] ] }, operatorList: [ { operatorId: 'OP_131', operatorType: 2, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'LIM_132', operatorType: 6, operatorAttributes: null, operatorCounters: null, done: true, started: true }, { operatorId: 'FS_139', operatorType: 10, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } queryplan stageList taskList: { taskId: 'Stage-3_MAP', taskType: 0, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object] ] }, operatorList: [ { operatorId: 'TS_140', operatorType: 9, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'RS_133', operatorType: 11, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } queryplan stageList taskList: { taskId: 'Stage-3_REDUCE', taskType: 1, taskAttributes: null, taskCounters: null, operatorGraph: { nodeType: 0, roots: null, adjacencyList: [ [Object], [Object] ] }, operatorList: [ { operatorId: 'OP_134', operatorType: 2, operatorAttributes: null, operatorCounters: {}, done: true, started: true }, { operatorId: 'LIM_135', operatorType: 6, operatorAttributes: null, operatorCounters: null, done: true, started: true }, { operatorId: 'FS_136', operatorType: 10, operatorAttributes: null, operatorCounters: null, done: true, started: true } ], done: true, started: true } getSchema: { fieldSchemas: [ { name: 'x', type: 'string', comment: null }, { name: 'cnt', type: 'bigint', comment: null } ], properties: null } getThriftSchema: { fieldSchemas: [ { name: 'x', type: 'string', comment: null }, { name: 'cnt', type: 'i64', comment: null } ], properties: null } fetchAll: [ 'README.md\t1', 'bin\t1', 'derby.log\t1', 'extlib\t1', 'hql\t1', 'js\t1', 'lib\t1', 'metastore_db\t1', 't\t1' ]
しかし node.js の console.log() や console.err() はオブジェクトを渡すと適当にシリアライズしてくれるので実にラクでいいね!