Elasticsearch 和 PIG:如何获得使用字段名称的工作模式?

Elasticsearch and PIG: how to get a working schema to use field names?

我有一个由 logstash 生成并加载到 elasticsearch 中的索引。现在我想在 pig 中执行一些操作并将结果返回给 ES。 问题是我可以通过索引访问字段,但不能通过字段名访问。

以下作品:

A = LOAD 'logstash-2015.01.29/logs' USING org.elasticsearch.hadoop.pig.EsStorage();
B = foreach A generate [=10=];
DUMP B;

虽然这不是:

A = LOAD 'logstash-2015.01.29/logs' USING org.elasticsearch.hadoop.pig.EsStorage();
B = foreach A generate clientId;
DUMP B;

如何使用字段名称?

索引映射如下:

{
  "logstash-2015.01.21" : {
    "mappings" : {
      "_default_" : {
        "dynamic_templates" : [ {
          "string_fields" : {
            "mapping" : {
              "index" : "analyzed",
              "omit_norms" : true,
              "type" : "string",
              "fields" : {
                "raw" : {
                  "ignore_above" : 256,
                  "index" : "not_analyzed",
                  "type" : "string"
                }
              }
            },
            "match" : "*",
            "match_mapping_type" : "string"
          }
        } ],
        "_all" : {
          "enabled" : true
        },
        "properties" : {
          "@version" : {
            "type" : "string",
            "index" : "not_analyzed"
          },
          "geoip" : {
            "dynamic" : "true",
            "properties" : {
              "location" : {
                "type" : "geo_point"
              }
            }
          }
        }
      },
      "logs" : {
        "dynamic_templates" : [ {
          "string_fields" : {
            "mapping" : {
              "index" : "analyzed",
              "omit_norms" : true,
              "type" : "string",
              "fields" : {
                "raw" : {
                  "ignore_above" : 256,
                  "index" : "not_analyzed",
                  "type" : "string"
                }
              }
            },
            "match" : "*",
            "match_mapping_type" : "string"
          }
        } ],
        "_all" : {
          "enabled" : true
        },
        "properties" : {
          "@timestamp" : {
            "type" : "date",
            "format" : "dateOptionalTime"
          },
          "@version" : {
            "type" : "string",
            "index" : "not_analyzed"
          },
          "__unam" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "__utma" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "__utmz" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_ga" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_gat" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_r" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_s" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_u" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_utma" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_utmht" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "_utmz" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "adSenseId" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "agent" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "apiVersion" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "auth" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "bytes" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "clientId" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "clientVersion" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "clientip" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "cookies" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "csrftoken" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "encoding" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "eventAction" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "eventCategory" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "eventLabel" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "eventValue" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "fbm_1517496788499776" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "flashVersion" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "geoip" : {
            "dynamic" : "true",
            "properties" : {
              "location" : {
                "type" : "geo_point"
              }
            }
          },
          "hitType" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "host" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "httpversion" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "ident" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "javaEnabled" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "jid" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "language" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "location" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "message" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "messages" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "mindsparktb_206720000" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "mindsparktbsupport_206720000" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "nonInteraction" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "page" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "path" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "referrer" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "request" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "response" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "screenColors" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "screenResolution" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "sessionid" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "timestamp" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "timingCategory" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "timingLabel" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "timingValue" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "timingVar" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "title" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "trackingId" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "userId" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "verb" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "viewportSize" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          },
          "z" : {
            "type" : "string",
            "norms" : {
              "enabled" : false
            },
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed",
                "ignore_above" : 256
              }
            }
          }
        }
      }
    }
  }
}

最后我写了一个 python 脚本,它生成一个 es.mapping.names 设置和一个 AS 子句。现在我可以使用字段名称了...

import json;
from urllib2 import urlopen
jsonres= json.loads(urlopen('http://localhost:9200/logstash-*/_mapping').read())
allkeysbuttime=set()
for key in jsonres:
    allkeysbuttime.update(jsonres[key]['mappings']['logs']['properties'].keys())
allkeysbuttime.remove('@timestamp')
f=open('pigschema.txt','w+')

#Pig field name : {es} field name
f.write("'es.mapping.names=xtimestamp:@timestamp")
for k in allkeysbuttime:
    f.write(', '+k.replace('@','x').replace('_','x')+':'+k)
f.write("'\n\n")

f.write('AS (xtimestamp:datetime')
for k in allkeysbuttime:
    f.write(', '+k.replace('@','x').replace('_','x')+':chararray')
f.write(')')
f.close()