CQL 复制到 Cassandra table 不起作用
CQL copy to Cassandra table does not work
我有一个 tsv 文件,其中包含如下记录:
"county_id" "county_desc" "voter_reg_num" "status_cd" "voter_status_desc" "reason_cd" "voter_status_reason_desc" "absent_ind" "name_prefx_cd" "last_name" "first_name" "middle_name" "name_suffix_lbl" "res_street_address" "res_city_desc" "state_cd" "zip_code" "mail_addr1" "mail_addr2" "mail_addr3" "mail_addr4" "mail_city" "mail_state" "mail_zipcode" "full_phone_number" "race_code" "ethnic_code" "party_cd" "gender_code" "birth_age" "birth_state" "drivers_lic" "registr_dt" "precinct_abbrv" "precinct_desc" "municipality_abbrv" "municipality_desc" "ward_abbrv" "ward_desc" "cong_dist_abbrv" "super_court_abbrv" "judic_dist_abbrv" "nc_senate_abbrv" "nc_house_abbrv" "county_commiss_abbrv" "county_commiss_desc" "township_abbrv" "township_desc" "school_dist_abbrv" "school_dist_desc" "fire_dist_abbrv" "fire_dist_desc" "water_dist_abbrv" "water_dist_desc" "sewer_dist_abbrv" "sewer_dist_desc" "sanit_dist_abbrv" "sanit_dist_desc" "rescue_dist_abbrv" "rescue_dist_desc" "munic_dist_abbrv" "munic_dist_desc" "dist_1_abbrv" "dist_1_desc" "dist_2_abbrv" "dist_2_desc" "confidential_ind" "age" "ncid" "vtd_abbrv" "vtd_desc"
"1" "ALAMANCE" "000009005990" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AABEL" "EVELYN" "LARSEN" "" "4430 E GREENSBORO-CHAPEL HILL RD " "GRAHAM" "NC" "27253" "4430 E GREENSBORO-CHAPEL HILL RD" "" "" "" "GRAHAM" "NC" "27253" "0000000" "W" "NL" "UNA" "F" "80" "NY" "N" "10/01/1984" "08N" "NORTH NEWLIN" "" "" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA56273" "08N" "08N"
"1" "ALAMANCE" "000009048723" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "CHRISTINA" "CASTAGNA" "" "421 WHITT AVE " "BURLINGTON" "NC" "27215" "421 WHITT AVE" "" "" "" "BURLINGTON" "NC" "27215" "3362291110" "W" "UN" "UNA" "F" "40" "NC" "Y" "03/26/1996" "03S" "SOUTH BOONE" "BUR" "BURLINGTON" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "BUR" "BURLINGTON" "15A" "15A PROSECUTORIAL" " " " " "N" "Age 26 - 40" "AA98377" "03S" "03S"
"1" "ALAMANCE" "000009019674" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "CLAUDIA" "HAYDEN" "" "1013 EDITH ST " "BURLINGTON" "NC" "27215" "1013 EDITH ST" "" "" "" "BURLINGTON" "NC" "27215" "2228834" "W" "NL" "UNA" "F" "71" "VA" "Y" "08/15/1989" "124" "BURLINGTON 4" "BUR" "BURLINGTON" "" "" "06" "15A" "15A" "24" "063" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "BUR" "BURLINGTON" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA69747" "124" "124"
"1" "ALAMANCE" "000009129589" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "JAMES" "MICHAEL" "" "5608 OLD CHEROKEE LN " "GRAHAM" "NC" "27253" "PO BOX 98" "" "" "" "SAXAPAHAW" "NC" "27340" "2027443411" "W" "UN" "DEM" "M" "68" "MA" "N" "03/07/2012" "08N" "NORTH NEWLIN" "" "" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA170513" "08N" "08N"
这个模式:
create keyspace test with replication = {'class': 'SimpleStrategy','replication_factor' : 1 };
create table voters(
county_id varchar,
county_desc varchar ,
voter_reg_num varchar ,
status_cd varchar ,
voter_status_desc varchar ,
reason_cd varchar ,
voter_status_reason_desc varchar ,
absent_ind varchar ,
name_prefx_cd varchar ,
last_name varchar ,
first_name varchar ,
midl_name varchar ,
name_sufx_cd varchar,
res_street_address varchar ,
res_city_desc varchar ,
state_cd varchar,
zip_code varchar,
mail_addr1 varchar ,
mail_addr2 varchar ,
mail_addr3 varchar ,
mail_addr4 varchar ,
mail_city varchar ,
mail_state varchar,
mail_zipcode varchar,
full_phone_number varchar,
race_code varchar,
ethnic_code varchar,
party_cd varchar,
gender_code varchar,
birth_age varchar ,
birth_place varchar,
drivers_lic varchar ,
registr_dt varchar,
precinct_abbrv varchar,
precinct_desc varchar,
municipality_abbrv varchar,
municipality_desc varchar ,
ward_abbrv varchar,
ward_desc varchar ,
cong_dist_abbrv varchar,
super_court_abbrv varchar,
judic_dist_abbrv varchar,
nc_senate_abbrv varchar,
nc_house_abbrv varchar,
county_commiss_abbrv varchar,
county_commiss_desc varchar,
township_abbrv varchar,
township_desc varchar,
school_dist_abbrv varchar,
school_dist_desc varchar,
fire_dist_abbrv varchar,
fire_dist_desc varchar,
water_dist_abbrv varchar,
water_dist_desc varchar,
sewer_dist_abbrv varchar,
sewer_dist_desc varchar,
sanit_dist_abbrv varchar,
sanit_dist_desc varchar,
rescue_dist_abbrv varchar,
rescue_dist_desc varchar,
munic_dist_abbrv varchar,
munic_dist_desc varchar,
dist_1_abbrv varchar,
dist_1_desc varchar,
dist_2_abbrv varchar,
dist_2_desc varchar,
Confidential_ind varchar,
age varchar ,
ncid varchar,
vtd_abbrv varchar ,
vtd_desc varchar ,
primary key(voter_reg_num)
);
当我尝试复制到 cassandra table 时,使用:
COPY voters FROM 'sample.tsv' WITH DELIMITER='\t';
我得到:
Processed: 1000 rows; Rate: 1230 rows/s; Avg. rate: 1945 rows/s
1000 rows imported from 1 files in 0.514 seconds (0 skipped).
但是,当我进行计数时,我发现只有一条记录被复制到 table。
我做错了什么?提前致谢。
如果您的 tsv 文件顶部有列名称,那么不要忘记将 WITH HEADER = TRUE 添加到您的 COPY 命令中。我还会删除双引号并验证字段是否由制表符而不是空格分隔。要进行测试,只需从原始文件创建一个包含 50 行左右的新 tsv 文件,然后在其中进行更改。让我知道进展如何。
用复制命令添加列名
COPY voters(county_id,county_desc,voter_reg_num,status_cd,voter_status_desc,reason_cd,voter_status_reason_desc,absent_ind,name_prefx_cd,last_name,first_name,middle_name,name_suffix_lbl,res_street_address,res_city_desc,state_cd,zip_code,mail_addr1,mail_addr2,mail_addr3,mail_addr4,mail_city,mail_state,mail_zipcode,full_phone_number,race_code,ethnic_code,party_cd,gender_code,birth_age,birth_state,drivers_lic,registr_dt,precinct_abbrv,precinct_desc,municipality_abbrv,municipality_desc,ward_abbrv,ward_desc,cong_dist_abbrv,super_court_abbrv,judic_dist_abbrv,nc_senate_abbrv,nc_house_abbrv,county_commiss_abbrv,county_commiss_desc,township_abbrv,township_desc,school_dist_abbrv,school_dist_desc,fire_dist_abbrv,fire_dist_desc,water_dist_abbrv,water_dist_desc,sewer_dist_abbrv,sewer_dist_desc,sanit_dist_abbrv,sanit_dist_desc,rescue_dist_abbrv,rescue_dist_desc,munic_dist_abbrv,munic_dist_desc,dist_1_abbrv,dist_1_desc,dist_2_abbrv,dist_2_desc,confidential_ind,age,ncid,vtd_abbrv,vtd_desc) FROM 'sample.tsv' WITH DELIMITER='\t' AND HEADER = true;
我有一个 tsv 文件,其中包含如下记录:
"county_id" "county_desc" "voter_reg_num" "status_cd" "voter_status_desc" "reason_cd" "voter_status_reason_desc" "absent_ind" "name_prefx_cd" "last_name" "first_name" "middle_name" "name_suffix_lbl" "res_street_address" "res_city_desc" "state_cd" "zip_code" "mail_addr1" "mail_addr2" "mail_addr3" "mail_addr4" "mail_city" "mail_state" "mail_zipcode" "full_phone_number" "race_code" "ethnic_code" "party_cd" "gender_code" "birth_age" "birth_state" "drivers_lic" "registr_dt" "precinct_abbrv" "precinct_desc" "municipality_abbrv" "municipality_desc" "ward_abbrv" "ward_desc" "cong_dist_abbrv" "super_court_abbrv" "judic_dist_abbrv" "nc_senate_abbrv" "nc_house_abbrv" "county_commiss_abbrv" "county_commiss_desc" "township_abbrv" "township_desc" "school_dist_abbrv" "school_dist_desc" "fire_dist_abbrv" "fire_dist_desc" "water_dist_abbrv" "water_dist_desc" "sewer_dist_abbrv" "sewer_dist_desc" "sanit_dist_abbrv" "sanit_dist_desc" "rescue_dist_abbrv" "rescue_dist_desc" "munic_dist_abbrv" "munic_dist_desc" "dist_1_abbrv" "dist_1_desc" "dist_2_abbrv" "dist_2_desc" "confidential_ind" "age" "ncid" "vtd_abbrv" "vtd_desc"
"1" "ALAMANCE" "000009005990" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AABEL" "EVELYN" "LARSEN" "" "4430 E GREENSBORO-CHAPEL HILL RD " "GRAHAM" "NC" "27253" "4430 E GREENSBORO-CHAPEL HILL RD" "" "" "" "GRAHAM" "NC" "27253" "0000000" "W" "NL" "UNA" "F" "80" "NY" "N" "10/01/1984" "08N" "NORTH NEWLIN" "" "" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA56273" "08N" "08N"
"1" "ALAMANCE" "000009048723" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "CHRISTINA" "CASTAGNA" "" "421 WHITT AVE " "BURLINGTON" "NC" "27215" "421 WHITT AVE" "" "" "" "BURLINGTON" "NC" "27215" "3362291110" "W" "UN" "UNA" "F" "40" "NC" "Y" "03/26/1996" "03S" "SOUTH BOONE" "BUR" "BURLINGTON" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "BUR" "BURLINGTON" "15A" "15A PROSECUTORIAL" " " " " "N" "Age 26 - 40" "AA98377" "03S" "03S"
"1" "ALAMANCE" "000009019674" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "CLAUDIA" "HAYDEN" "" "1013 EDITH ST " "BURLINGTON" "NC" "27215" "1013 EDITH ST" "" "" "" "BURLINGTON" "NC" "27215" "2228834" "W" "NL" "UNA" "F" "71" "VA" "Y" "08/15/1989" "124" "BURLINGTON 4" "BUR" "BURLINGTON" "" "" "06" "15A" "15A" "24" "063" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "BUR" "BURLINGTON" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA69747" "124" "124"
"1" "ALAMANCE" "000009129589" "A" "ACTIVE" "AV" "VERIFIED" " " " " "AARON" "JAMES" "MICHAEL" "" "5608 OLD CHEROKEE LN " "GRAHAM" "NC" "27253" "PO BOX 98" "" "" "" "SAXAPAHAW" "NC" "27340" "2027443411" "W" "UN" "DEM" "M" "68" "MA" "N" "03/07/2012" "08N" "NORTH NEWLIN" "" "" "" "" "06" "15A" "15A" "24" "064" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "15A" "15A PROSECUTORIAL" " " " " "N" "Age Over 66" "AA170513" "08N" "08N"
这个模式:
create keyspace test with replication = {'class': 'SimpleStrategy','replication_factor' : 1 };
create table voters(
county_id varchar,
county_desc varchar ,
voter_reg_num varchar ,
status_cd varchar ,
voter_status_desc varchar ,
reason_cd varchar ,
voter_status_reason_desc varchar ,
absent_ind varchar ,
name_prefx_cd varchar ,
last_name varchar ,
first_name varchar ,
midl_name varchar ,
name_sufx_cd varchar,
res_street_address varchar ,
res_city_desc varchar ,
state_cd varchar,
zip_code varchar,
mail_addr1 varchar ,
mail_addr2 varchar ,
mail_addr3 varchar ,
mail_addr4 varchar ,
mail_city varchar ,
mail_state varchar,
mail_zipcode varchar,
full_phone_number varchar,
race_code varchar,
ethnic_code varchar,
party_cd varchar,
gender_code varchar,
birth_age varchar ,
birth_place varchar,
drivers_lic varchar ,
registr_dt varchar,
precinct_abbrv varchar,
precinct_desc varchar,
municipality_abbrv varchar,
municipality_desc varchar ,
ward_abbrv varchar,
ward_desc varchar ,
cong_dist_abbrv varchar,
super_court_abbrv varchar,
judic_dist_abbrv varchar,
nc_senate_abbrv varchar,
nc_house_abbrv varchar,
county_commiss_abbrv varchar,
county_commiss_desc varchar,
township_abbrv varchar,
township_desc varchar,
school_dist_abbrv varchar,
school_dist_desc varchar,
fire_dist_abbrv varchar,
fire_dist_desc varchar,
water_dist_abbrv varchar,
water_dist_desc varchar,
sewer_dist_abbrv varchar,
sewer_dist_desc varchar,
sanit_dist_abbrv varchar,
sanit_dist_desc varchar,
rescue_dist_abbrv varchar,
rescue_dist_desc varchar,
munic_dist_abbrv varchar,
munic_dist_desc varchar,
dist_1_abbrv varchar,
dist_1_desc varchar,
dist_2_abbrv varchar,
dist_2_desc varchar,
Confidential_ind varchar,
age varchar ,
ncid varchar,
vtd_abbrv varchar ,
vtd_desc varchar ,
primary key(voter_reg_num)
);
当我尝试复制到 cassandra table 时,使用:
COPY voters FROM 'sample.tsv' WITH DELIMITER='\t';
我得到:
Processed: 1000 rows; Rate: 1230 rows/s; Avg. rate: 1945 rows/s
1000 rows imported from 1 files in 0.514 seconds (0 skipped).
但是,当我进行计数时,我发现只有一条记录被复制到 table。
我做错了什么?提前致谢。
如果您的 tsv 文件顶部有列名称,那么不要忘记将 WITH HEADER = TRUE 添加到您的 COPY 命令中。我还会删除双引号并验证字段是否由制表符而不是空格分隔。要进行测试,只需从原始文件创建一个包含 50 行左右的新 tsv 文件,然后在其中进行更改。让我知道进展如何。
用复制命令添加列名
COPY voters(county_id,county_desc,voter_reg_num,status_cd,voter_status_desc,reason_cd,voter_status_reason_desc,absent_ind,name_prefx_cd,last_name,first_name,middle_name,name_suffix_lbl,res_street_address,res_city_desc,state_cd,zip_code,mail_addr1,mail_addr2,mail_addr3,mail_addr4,mail_city,mail_state,mail_zipcode,full_phone_number,race_code,ethnic_code,party_cd,gender_code,birth_age,birth_state,drivers_lic,registr_dt,precinct_abbrv,precinct_desc,municipality_abbrv,municipality_desc,ward_abbrv,ward_desc,cong_dist_abbrv,super_court_abbrv,judic_dist_abbrv,nc_senate_abbrv,nc_house_abbrv,county_commiss_abbrv,county_commiss_desc,township_abbrv,township_desc,school_dist_abbrv,school_dist_desc,fire_dist_abbrv,fire_dist_desc,water_dist_abbrv,water_dist_desc,sewer_dist_abbrv,sewer_dist_desc,sanit_dist_abbrv,sanit_dist_desc,rescue_dist_abbrv,rescue_dist_desc,munic_dist_abbrv,munic_dist_desc,dist_1_abbrv,dist_1_desc,dist_2_abbrv,dist_2_desc,confidential_ind,age,ncid,vtd_abbrv,vtd_desc) FROM 'sample.tsv' WITH DELIMITER='\t' AND HEADER = true;