反序列化包含简单引用实体的 xml 文件时出现异常
Exception when deserializing an xml file containing a simple referenced entity
我遇到异常
XmlException: Unexpected node type EntityReference. ReadElementString method can only be called on elements with simple or empty content.
使用这段简单的代码时:
using (StreamReader reader = new StreamReader(filePath, Encoding.UTF8))
{
XmlSerializer serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(reader);
}
我使用 xsd.exe 从 xsd 文件生成我的 c# 类。 (注意:由于体长而被截断,但我认为本质是解决问题的地方):
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
using System.Xml.Serialization;
//
// This source code was auto-generated by xsd, Version=4.7.2046.0.
//
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
[System.Xml.Serialization.XmlRoot("JMdict", IsNullable = false)]
public partial class JMdict
{
private entry[] entryField;
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("entry")]
public entry[] entry
{
get
{
return this.entryField;
}
set
{
this.entryField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class entry
{
private string ent_seqField;
private k_ele[] k_eleField;
private r_ele[] r_eleField;
private sense[] senseField;
/// <remarks/>
public string ent_seq
{
get
{
return this.ent_seqField;
}
set
{
this.ent_seqField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("k_ele")]
public k_ele[] k_ele
{
get
{
return this.k_eleField;
}
set
{
this.k_eleField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("r_ele")]
public r_ele[] r_ele
{
get
{
return this.r_eleField;
}
set
{
this.r_eleField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("sense")]
public sense[] sense
{
get
{
return this.senseField;
}
set
{
this.senseField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class k_ele
{
private string kebField;
private string[] ke_infField;
private string[] ke_priField;
/// <remarks/>
public string keb
{
get
{
return this.kebField;
}
set
{
this.kebField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ke_inf")]
public string[] ke_inf
{
get
{
return this.ke_infField;
}
set
{
this.ke_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ke_pri")]
public string[] ke_pri
{
get
{
return this.ke_priField;
}
set
{
this.ke_priField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class r_ele
{
private string rebField;
private string re_nokanjiField;
private string[] re_restrField;
private string[] re_infField;
private string[] re_priField;
/// <remarks/>
public string reb
{
get
{
return this.rebField;
}
set
{
this.rebField = value;
}
}
/// <remarks/>
public string re_nokanji
{
get
{
return this.re_nokanjiField;
}
set
{
this.re_nokanjiField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_restr")]
public string[] re_restr
{
get
{
return this.re_restrField;
}
set
{
this.re_restrField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_inf")]
public string[] re_inf
{
get
{
return this.re_infField;
}
set
{
this.re_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_pri")]
public string[] re_pri
{
get
{
return this.re_priField;
}
set
{
this.re_priField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class sense
{
private string[] stagkField;
private string[] stagrField;
private string[] posField;
private string[] xrefField;
private string[] antField;
private string[] fieldField;
private string[] miscField;
private string[] s_infField;
private lsource[] lsourceField;
private string[] dialField;
private gloss[] glossField;
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("stagk")]
public string[] stagk
{
get
{
return this.stagkField;
}
set
{
this.stagkField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("stagr")]
public string[] stagr
{
get
{
return this.stagrField;
}
set
{
this.stagrField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("pos")]
public string[] pos
{
get
{
return this.posField;
}
set
{
this.posField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("xref")]
public string[] xref
{
get
{
return this.xrefField;
}
set
{
this.xrefField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ant")]
public string[] ant
{
get
{
return this.antField;
}
set
{
this.antField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("field")]
public string[] field
{
get
{
return this.fieldField;
}
set
{
this.fieldField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("misc")]
public string[] misc
{
get
{
return this.miscField;
}
set
{
this.miscField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("s_inf")]
public string[] s_inf
{
get
{
return this.s_infField;
}
set
{
this.s_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("lsource")]
public lsource[] lsource
{
get
{
return this.lsourceField;
}
set
{
this.lsourceField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("dial")]
public string[] dial
{
get
{
return this.dialField;
}
set
{
this.dialField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("gloss")]
public gloss[] gloss
{
get
{
return this.glossField;
}
set
{
this.glossField = value;
}
}
}
这是我要反序列化的 xml 文件的开头:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE JMdict [
<!ELEMENT JMdict (entry*)>
<!ELEMENT entry (ent_seq,k_ele*,r_ele+,sense+)>
<!ELEMENT ent_seq (#PCDATA)>
<!ELEMENT k_ele (keb,ke_inf*,ke_pri*)>
<!ELEMENT keb (#PCDATA)>
<!ELEMENT ke_inf (#PCDATA)>
<!ELEMENT ke_pri (#PCDATA)>
<!ELEMENT r_ele (reb,re_nokanji?,re_restr*,re_inf*,re_pri*)>
<!ELEMENT reb (#PCDATA)>
<!ELEMENT re_nokanji (#PCDATA)>
<!ELEMENT re_restr (#PCDATA)>
<!ELEMENT re_inf (#PCDATA)>
<!ELEMENT re_pri (#PCDATA)>
<!ELEMENT sense (stagk*,stagr*,pos*,xref*,ant*,field*,misc*,s_inf*,lsource*,dial*,gloss*)>
<!ELEMENT stagk (#PCDATA)>
<!ELEMENT stagr (#PCDATA)>
<!ELEMENT xref (#PCDATA)*>
<!ELEMENT ant (#PCDATA)*>
<!ELEMENT pos (#PCDATA)>
<!ELEMENT field (#PCDATA)>
<!ELEMENT misc (#PCDATA)>
<!ELEMENT lsource (#PCDATA)>
<!ATTLIST lsource xml:lang CDATA "eng">
<!ATTLIST lsource ls_type CDATA #IMPLIED>
<!ATTLIST lsource ls_wasei CDATA #IMPLIED>
<!ELEMENT dial (#PCDATA)>
<!ELEMENT gloss (#PCDATA|pri)*>
<!ATTLIST gloss xml:lang CDATA "eng">
<!ATTLIST gloss g_gend CDATA #IMPLIED>
<!ELEMENT pri (#PCDATA)>
<!ELEMENT s_inf (#PCDATA)>
<!ENTITY MA "martial arts term">
<!ENTITY X "rude or X-rated term (not displayed in educational software)">
<!ENTITY abbr "abbreviation">
<!ENTITY adj-i "adjective (keiyoushi)">
<!ENTITY adj-ix "adjective (keiyoushi) - yoi/ii class">
<!ENTITY adj-na "adjectival nouns or quasi-adjectives (keiyodoshi)">
<!ENTITY adj-no "nouns which may take the genitive case particle `no'">
<!ENTITY adj-pn "pre-noun adjectival (rentaishi)">
<!ENTITY adj-t "`taru' adjective">
<!ENTITY adj-f "noun or verb acting prenominally">
<!ENTITY adv "adverb (fukushi)">
<!ENTITY adv-to "adverb taking the `to' particle">
<!ENTITY arch "archaism">
<!ENTITY ateji "ateji (phonetic) reading">
<!ENTITY aux "auxiliary">
<!ENTITY aux-v "auxiliary verb">
<!ENTITY aux-adj "auxiliary adjective">
<!ENTITY Buddh "Buddhist term">
<!ENTITY chem "chemistry term">
<!ENTITY chn "children's language">
<!ENTITY col "colloquialism">
<!ENTITY comp "computer terminology">
<!ENTITY conj "conjunction">
<!ENTITY cop-da "copula">
<!ENTITY ctr "counter">
<!ENTITY derog "derogatory">
<!ENTITY eK "exclusively kanji">
<!ENTITY ek "exclusively kana">
<!ENTITY exp "expressions (phrases, clauses, etc.)">
<!ENTITY fam "familiar language">
<!ENTITY fem "female term or language">
<!ENTITY food "food term">
<!ENTITY geom "geometry term">
<!ENTITY gikun "gikun (meaning as reading) or jukujikun (special kanji reading)">
<!ENTITY hon "honorific or respectful (sonkeigo) language">
<!ENTITY hum "humble (kenjougo) language">
<!ENTITY iK "word containing irregular kanji usage">
<!ENTITY id "idiomatic expression">
<!ENTITY ik "word containing irregular kana usage">
<!ENTITY int "interjection (kandoushi)">
<!ENTITY io "irregular okurigana usage">
<!ENTITY iv "irregular verb">
<!ENTITY ling "linguistics terminology">
<!ENTITY m-sl "manga slang">
<!ENTITY male "male term or language">
<!ENTITY male-sl "male slang">
<!ENTITY math "mathematics">
<!ENTITY mil "military">
<!ENTITY n "noun (common) (futsuumeishi)">
<!ENTITY n-adv "adverbial noun (fukushitekimeishi)">
<!ENTITY n-suf "noun, used as a suffix">
<!ENTITY n-pref "noun, used as a prefix">
<!ENTITY n-t "noun (temporal) (jisoumeishi)">
<!ENTITY num "numeric">
<!ENTITY oK "word containing out-dated kanji">
<!ENTITY obs "obsolete term">
<!ENTITY obsc "obscure term">
<!ENTITY ok "out-dated or obsolete kana usage">
<!ENTITY oik "old or irregular kana form">
<!ENTITY on-mim "onomatopoeic or mimetic word">
<!ENTITY pn "pronoun">
<!ENTITY poet "poetical term">
<!ENTITY pol "polite (teineigo) language">
<!ENTITY pref "prefix">
<!ENTITY proverb "proverb">
<!ENTITY prt "particle">
<!ENTITY physics "physics terminology">
<!ENTITY rare "rare">
<!ENTITY sens "sensitive">
<!ENTITY sl "slang">
<!ENTITY suf "suffix">
<!ENTITY uK "word usually written using kanji alone">
<!ENTITY uk "word usually written using kana alone">
<!ENTITY unc "unclassified">
<!ENTITY yoji "yojijukugo">
<!ENTITY v1 "Ichidan verb">
<!ENTITY v1-s "Ichidan verb - kureru special class">
<!ENTITY v2a-s "Nidan verb with 'u' ending (archaic)">
<!ENTITY v4h "Yodan verb with `hu/fu' ending (archaic)">
<!ENTITY v4r "Yodan verb with `ru' ending (archaic)">
<!ENTITY v5aru "Godan verb - -aru special class">
<!ENTITY v5b "Godan verb with `bu' ending">
<!ENTITY v5g "Godan verb with `gu' ending">
<!ENTITY v5k "Godan verb with `ku' ending">
<!ENTITY v5k-s "Godan verb - Iku/Yuku special class">
<!ENTITY v5m "Godan verb with `mu' ending">
<!ENTITY v5n "Godan verb with `nu' ending">
<!ENTITY v5r "Godan verb with `ru' ending">
<!ENTITY v5r-i "Godan verb with `ru' ending (irregular verb)">
<!ENTITY v5s "Godan verb with `su' ending">
<!ENTITY v5t "Godan verb with `tsu' ending">
<!ENTITY v5u "Godan verb with `u' ending">
<!ENTITY v5u-s "Godan verb with `u' ending (special class)">
<!ENTITY v5uru "Godan verb - Uru old class verb (old form of Eru)">
<!ENTITY vz "Ichidan verb - zuru verb (alternative form of -jiru verbs)">
<!ENTITY vi "intransitive verb">
<!ENTITY vk "Kuru verb - special class">
<!ENTITY vn "irregular nu verb">
<!ENTITY vr "irregular ru verb, plain form ends with -ri">
<!ENTITY vs "noun or participle which takes the aux. verb suru">
<!ENTITY vs-c "su verb - precursor to the modern suru">
<!ENTITY vs-s "suru verb - special class">
<!ENTITY vs-i "suru verb - irregular">
<!ENTITY kyb "Kyoto-ben">
<!ENTITY osb "Osaka-ben">
<!ENTITY ksb "Kansai-ben">
<!ENTITY ktb "Kantou-ben">
<!ENTITY tsb "Tosa-ben">
<!ENTITY thb "Touhoku-ben">
<!ENTITY tsug "Tsugaru-ben">
<!ENTITY kyu "Kyuushuu-ben">
<!ENTITY rkb "Ryuukyuu-ben">
<!ENTITY nab "Nagano-ben">
<!ENTITY hob "Hokkaido-ben">
<!ENTITY vt "transitive verb">
<!ENTITY vulg "vulgar expression or word">
<!ENTITY adj-kari "`kari' adjective (archaic)">
<!ENTITY adj-ku "`ku' adjective (archaic)">
<!ENTITY adj-shiku "`shiku' adjective (archaic)">
<!ENTITY adj-nari "archaic/formal form of na-adjective">
<!ENTITY n-pr "proper noun">
<!ENTITY v-unspec "verb unspecified">
<!ENTITY v4k "Yodan verb with `ku' ending (archaic)">
<!ENTITY v4g "Yodan verb with `gu' ending (archaic)">
<!ENTITY v4s "Yodan verb with `su' ending (archaic)">
<!ENTITY v4t "Yodan verb with `tsu' ending (archaic)">
<!ENTITY v4n "Yodan verb with `nu' ending (archaic)">
<!ENTITY v4b "Yodan verb with `bu' ending (archaic)">
<!ENTITY v4m "Yodan verb with `mu' ending (archaic)">
<!ENTITY v2k-k "Nidan verb (upper class) with `ku' ending (archaic)">
<!ENTITY v2g-k "Nidan verb (upper class) with `gu' ending (archaic)">
<!ENTITY v2t-k "Nidan verb (upper class) with `tsu' ending (archaic)">
<!ENTITY v2d-k "Nidan verb (upper class) with `dzu' ending (archaic)">
<!ENTITY v2h-k "Nidan verb (upper class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-k "Nidan verb (upper class) with `bu' ending (archaic)">
<!ENTITY v2m-k "Nidan verb (upper class) with `mu' ending (archaic)">
<!ENTITY v2y-k "Nidan verb (upper class) with `yu' ending (archaic)">
<!ENTITY v2r-k "Nidan verb (upper class) with `ru' ending (archaic)">
<!ENTITY v2k-s "Nidan verb (lower class) with `ku' ending (archaic)">
<!ENTITY v2g-s "Nidan verb (lower class) with `gu' ending (archaic)">
<!ENTITY v2s-s "Nidan verb (lower class) with `su' ending (archaic)">
<!ENTITY v2z-s "Nidan verb (lower class) with `zu' ending (archaic)">
<!ENTITY v2t-s "Nidan verb (lower class) with `tsu' ending (archaic)">
<!ENTITY v2d-s "Nidan verb (lower class) with `dzu' ending (archaic)">
<!ENTITY v2n-s "Nidan verb (lower class) with `nu' ending (archaic)">
<!ENTITY v2h-s "Nidan verb (lower class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-s "Nidan verb (lower class) with `bu' ending (archaic)">
<!ENTITY v2m-s "Nidan verb (lower class) with `mu' ending (archaic)">
<!ENTITY v2y-s "Nidan verb (lower class) with `yu' ending (archaic)">
<!ENTITY v2r-s "Nidan verb (lower class) with `ru' ending (archaic)">
<!ENTITY v2w-s "Nidan verb (lower class) with `u' ending and `we' conjugation (archaic)">
<!ENTITY archit "architecture term">
<!ENTITY astron "astronomy, etc. term">
<!ENTITY baseb "baseball term">
<!ENTITY biol "biology term">
<!ENTITY bot "botany term">
<!ENTITY bus "business term">
<!ENTITY econ "economics term">
<!ENTITY engr "engineering term">
<!ENTITY finc "finance term">
<!ENTITY geol "geology, etc. term">
<!ENTITY law "law, etc. term">
<!ENTITY mahj "mahjong term">
<!ENTITY med "medicine, etc. term">
<!ENTITY music "music term">
<!ENTITY Shinto "Shinto term">
<!ENTITY shogi "shogi term">
<!ENTITY sports "sports term">
<!ENTITY sumo "sumo term">
<!ENTITY zool "zoology term">
<!ENTITY joc "jocular, humorous term">
<!ENTITY anat "anatomical term">
]>
<JMdict>
<entry>
<ent_seq>1000000</ent_seq>
<r_ele>
<reb>ヽ</reb>
</r_ele>
<r_ele>
<reb>くりかえし</reb>
</r_ele>
<sense>
<pos>&n;</pos>
<gloss xml:lang="eng">repetition mark in katakana</gloss>
</sense>
</entry>
<entry>
<ent_seq>1000010</ent_seq>
<r_ele>
<reb>ヾ</reb>
</r_ele>
<r_ele>
<reb>くりかえし</reb>
</r_ele>
<sense>
<pos>&n;</pos>
<gloss xml:lang="eng">voiced repetition mark in katakana</gloss>
</sense>
</entry>
</JMdict>
错误行对应最后的<pos>&n;</pos>
所以好像他无法映射引用的实体(<!ENTITY n "noun (common) (futsuumeishi)">
)
您需要告诉 XmlSerializer
(或者更确切地说,底层 XmlReader
)像这样扩展 XML entity references by setting XmlReaderSettings.DtdProcessing = DtdProcessing.Parse
是安全的:
static entry[] DeserializeEntries(string filePath)
{
var settings = new XmlReaderSettings
{
// Allow processing of DTD
DtdProcessing = DtdProcessing.Parse,
// On older versions of .Net instead set
//ProhibitDtd = false,
// But for security, prevent DOS attacks by limiting the total number of characters that can be expanded to something sane.
MaxCharactersFromEntities = (long)1e7,
// And for security, disable resolution of entities from external documents.
XmlResolver = null,
};
using (var reader = new StreamReader(filePath, Encoding.UTF8))
using (var xmlReader = XmlReader.Create(reader, settings))
{
var serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(xmlReader);
}
}
备注:
不受信任的 XML 可以通过巧妙构造的 DTD 实体和实体引用强制内存不足异常来执行拒绝服务攻击,如图所示 here. Setting XmlReaderSettings.MaxCharactersFromEntities
一些合理的东西应该可以减轻这种情况。
此特定值取自 reference source for XElement.Load()
。修改以满足您的需要。
类似地设置 XmlReaderSettings.XmlResolver = null
可防止不受信任的 XML 文件生成对外部资源的意外请求。
如果您打算使用 XmlRootAttribute
覆盖构造一个 XmlSerializer
,您必须静态缓存序列化程序以避免严重的内存泄漏,如 here.
工作.Net fiddle。
我遇到异常
XmlException: Unexpected node type EntityReference. ReadElementString method can only be called on elements with simple or empty content.
使用这段简单的代码时:
using (StreamReader reader = new StreamReader(filePath, Encoding.UTF8))
{
XmlSerializer serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(reader);
}
我使用 xsd.exe 从 xsd 文件生成我的 c# 类。 (注意:由于体长而被截断,但我认为本质是解决问题的地方):
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
using System.Xml.Serialization;
//
// This source code was auto-generated by xsd, Version=4.7.2046.0.
//
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
[System.Xml.Serialization.XmlRoot("JMdict", IsNullable = false)]
public partial class JMdict
{
private entry[] entryField;
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("entry")]
public entry[] entry
{
get
{
return this.entryField;
}
set
{
this.entryField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class entry
{
private string ent_seqField;
private k_ele[] k_eleField;
private r_ele[] r_eleField;
private sense[] senseField;
/// <remarks/>
public string ent_seq
{
get
{
return this.ent_seqField;
}
set
{
this.ent_seqField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("k_ele")]
public k_ele[] k_ele
{
get
{
return this.k_eleField;
}
set
{
this.k_eleField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("r_ele")]
public r_ele[] r_ele
{
get
{
return this.r_eleField;
}
set
{
this.r_eleField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("sense")]
public sense[] sense
{
get
{
return this.senseField;
}
set
{
this.senseField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class k_ele
{
private string kebField;
private string[] ke_infField;
private string[] ke_priField;
/// <remarks/>
public string keb
{
get
{
return this.kebField;
}
set
{
this.kebField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ke_inf")]
public string[] ke_inf
{
get
{
return this.ke_infField;
}
set
{
this.ke_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ke_pri")]
public string[] ke_pri
{
get
{
return this.ke_priField;
}
set
{
this.ke_priField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class r_ele
{
private string rebField;
private string re_nokanjiField;
private string[] re_restrField;
private string[] re_infField;
private string[] re_priField;
/// <remarks/>
public string reb
{
get
{
return this.rebField;
}
set
{
this.rebField = value;
}
}
/// <remarks/>
public string re_nokanji
{
get
{
return this.re_nokanjiField;
}
set
{
this.re_nokanjiField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_restr")]
public string[] re_restr
{
get
{
return this.re_restrField;
}
set
{
this.re_restrField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_inf")]
public string[] re_inf
{
get
{
return this.re_infField;
}
set
{
this.re_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("re_pri")]
public string[] re_pri
{
get
{
return this.re_priField;
}
set
{
this.re_priField = value;
}
}
}
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.SerializableAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
public partial class sense
{
private string[] stagkField;
private string[] stagrField;
private string[] posField;
private string[] xrefField;
private string[] antField;
private string[] fieldField;
private string[] miscField;
private string[] s_infField;
private lsource[] lsourceField;
private string[] dialField;
private gloss[] glossField;
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("stagk")]
public string[] stagk
{
get
{
return this.stagkField;
}
set
{
this.stagkField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("stagr")]
public string[] stagr
{
get
{
return this.stagrField;
}
set
{
this.stagrField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("pos")]
public string[] pos
{
get
{
return this.posField;
}
set
{
this.posField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("xref")]
public string[] xref
{
get
{
return this.xrefField;
}
set
{
this.xrefField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("ant")]
public string[] ant
{
get
{
return this.antField;
}
set
{
this.antField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("field")]
public string[] field
{
get
{
return this.fieldField;
}
set
{
this.fieldField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("misc")]
public string[] misc
{
get
{
return this.miscField;
}
set
{
this.miscField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("s_inf")]
public string[] s_inf
{
get
{
return this.s_infField;
}
set
{
this.s_infField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("lsource")]
public lsource[] lsource
{
get
{
return this.lsourceField;
}
set
{
this.lsourceField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("dial")]
public string[] dial
{
get
{
return this.dialField;
}
set
{
this.dialField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("gloss")]
public gloss[] gloss
{
get
{
return this.glossField;
}
set
{
this.glossField = value;
}
}
}
这是我要反序列化的 xml 文件的开头:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE JMdict [
<!ELEMENT JMdict (entry*)>
<!ELEMENT entry (ent_seq,k_ele*,r_ele+,sense+)>
<!ELEMENT ent_seq (#PCDATA)>
<!ELEMENT k_ele (keb,ke_inf*,ke_pri*)>
<!ELEMENT keb (#PCDATA)>
<!ELEMENT ke_inf (#PCDATA)>
<!ELEMENT ke_pri (#PCDATA)>
<!ELEMENT r_ele (reb,re_nokanji?,re_restr*,re_inf*,re_pri*)>
<!ELEMENT reb (#PCDATA)>
<!ELEMENT re_nokanji (#PCDATA)>
<!ELEMENT re_restr (#PCDATA)>
<!ELEMENT re_inf (#PCDATA)>
<!ELEMENT re_pri (#PCDATA)>
<!ELEMENT sense (stagk*,stagr*,pos*,xref*,ant*,field*,misc*,s_inf*,lsource*,dial*,gloss*)>
<!ELEMENT stagk (#PCDATA)>
<!ELEMENT stagr (#PCDATA)>
<!ELEMENT xref (#PCDATA)*>
<!ELEMENT ant (#PCDATA)*>
<!ELEMENT pos (#PCDATA)>
<!ELEMENT field (#PCDATA)>
<!ELEMENT misc (#PCDATA)>
<!ELEMENT lsource (#PCDATA)>
<!ATTLIST lsource xml:lang CDATA "eng">
<!ATTLIST lsource ls_type CDATA #IMPLIED>
<!ATTLIST lsource ls_wasei CDATA #IMPLIED>
<!ELEMENT dial (#PCDATA)>
<!ELEMENT gloss (#PCDATA|pri)*>
<!ATTLIST gloss xml:lang CDATA "eng">
<!ATTLIST gloss g_gend CDATA #IMPLIED>
<!ELEMENT pri (#PCDATA)>
<!ELEMENT s_inf (#PCDATA)>
<!ENTITY MA "martial arts term">
<!ENTITY X "rude or X-rated term (not displayed in educational software)">
<!ENTITY abbr "abbreviation">
<!ENTITY adj-i "adjective (keiyoushi)">
<!ENTITY adj-ix "adjective (keiyoushi) - yoi/ii class">
<!ENTITY adj-na "adjectival nouns or quasi-adjectives (keiyodoshi)">
<!ENTITY adj-no "nouns which may take the genitive case particle `no'">
<!ENTITY adj-pn "pre-noun adjectival (rentaishi)">
<!ENTITY adj-t "`taru' adjective">
<!ENTITY adj-f "noun or verb acting prenominally">
<!ENTITY adv "adverb (fukushi)">
<!ENTITY adv-to "adverb taking the `to' particle">
<!ENTITY arch "archaism">
<!ENTITY ateji "ateji (phonetic) reading">
<!ENTITY aux "auxiliary">
<!ENTITY aux-v "auxiliary verb">
<!ENTITY aux-adj "auxiliary adjective">
<!ENTITY Buddh "Buddhist term">
<!ENTITY chem "chemistry term">
<!ENTITY chn "children's language">
<!ENTITY col "colloquialism">
<!ENTITY comp "computer terminology">
<!ENTITY conj "conjunction">
<!ENTITY cop-da "copula">
<!ENTITY ctr "counter">
<!ENTITY derog "derogatory">
<!ENTITY eK "exclusively kanji">
<!ENTITY ek "exclusively kana">
<!ENTITY exp "expressions (phrases, clauses, etc.)">
<!ENTITY fam "familiar language">
<!ENTITY fem "female term or language">
<!ENTITY food "food term">
<!ENTITY geom "geometry term">
<!ENTITY gikun "gikun (meaning as reading) or jukujikun (special kanji reading)">
<!ENTITY hon "honorific or respectful (sonkeigo) language">
<!ENTITY hum "humble (kenjougo) language">
<!ENTITY iK "word containing irregular kanji usage">
<!ENTITY id "idiomatic expression">
<!ENTITY ik "word containing irregular kana usage">
<!ENTITY int "interjection (kandoushi)">
<!ENTITY io "irregular okurigana usage">
<!ENTITY iv "irregular verb">
<!ENTITY ling "linguistics terminology">
<!ENTITY m-sl "manga slang">
<!ENTITY male "male term or language">
<!ENTITY male-sl "male slang">
<!ENTITY math "mathematics">
<!ENTITY mil "military">
<!ENTITY n "noun (common) (futsuumeishi)">
<!ENTITY n-adv "adverbial noun (fukushitekimeishi)">
<!ENTITY n-suf "noun, used as a suffix">
<!ENTITY n-pref "noun, used as a prefix">
<!ENTITY n-t "noun (temporal) (jisoumeishi)">
<!ENTITY num "numeric">
<!ENTITY oK "word containing out-dated kanji">
<!ENTITY obs "obsolete term">
<!ENTITY obsc "obscure term">
<!ENTITY ok "out-dated or obsolete kana usage">
<!ENTITY oik "old or irregular kana form">
<!ENTITY on-mim "onomatopoeic or mimetic word">
<!ENTITY pn "pronoun">
<!ENTITY poet "poetical term">
<!ENTITY pol "polite (teineigo) language">
<!ENTITY pref "prefix">
<!ENTITY proverb "proverb">
<!ENTITY prt "particle">
<!ENTITY physics "physics terminology">
<!ENTITY rare "rare">
<!ENTITY sens "sensitive">
<!ENTITY sl "slang">
<!ENTITY suf "suffix">
<!ENTITY uK "word usually written using kanji alone">
<!ENTITY uk "word usually written using kana alone">
<!ENTITY unc "unclassified">
<!ENTITY yoji "yojijukugo">
<!ENTITY v1 "Ichidan verb">
<!ENTITY v1-s "Ichidan verb - kureru special class">
<!ENTITY v2a-s "Nidan verb with 'u' ending (archaic)">
<!ENTITY v4h "Yodan verb with `hu/fu' ending (archaic)">
<!ENTITY v4r "Yodan verb with `ru' ending (archaic)">
<!ENTITY v5aru "Godan verb - -aru special class">
<!ENTITY v5b "Godan verb with `bu' ending">
<!ENTITY v5g "Godan verb with `gu' ending">
<!ENTITY v5k "Godan verb with `ku' ending">
<!ENTITY v5k-s "Godan verb - Iku/Yuku special class">
<!ENTITY v5m "Godan verb with `mu' ending">
<!ENTITY v5n "Godan verb with `nu' ending">
<!ENTITY v5r "Godan verb with `ru' ending">
<!ENTITY v5r-i "Godan verb with `ru' ending (irregular verb)">
<!ENTITY v5s "Godan verb with `su' ending">
<!ENTITY v5t "Godan verb with `tsu' ending">
<!ENTITY v5u "Godan verb with `u' ending">
<!ENTITY v5u-s "Godan verb with `u' ending (special class)">
<!ENTITY v5uru "Godan verb - Uru old class verb (old form of Eru)">
<!ENTITY vz "Ichidan verb - zuru verb (alternative form of -jiru verbs)">
<!ENTITY vi "intransitive verb">
<!ENTITY vk "Kuru verb - special class">
<!ENTITY vn "irregular nu verb">
<!ENTITY vr "irregular ru verb, plain form ends with -ri">
<!ENTITY vs "noun or participle which takes the aux. verb suru">
<!ENTITY vs-c "su verb - precursor to the modern suru">
<!ENTITY vs-s "suru verb - special class">
<!ENTITY vs-i "suru verb - irregular">
<!ENTITY kyb "Kyoto-ben">
<!ENTITY osb "Osaka-ben">
<!ENTITY ksb "Kansai-ben">
<!ENTITY ktb "Kantou-ben">
<!ENTITY tsb "Tosa-ben">
<!ENTITY thb "Touhoku-ben">
<!ENTITY tsug "Tsugaru-ben">
<!ENTITY kyu "Kyuushuu-ben">
<!ENTITY rkb "Ryuukyuu-ben">
<!ENTITY nab "Nagano-ben">
<!ENTITY hob "Hokkaido-ben">
<!ENTITY vt "transitive verb">
<!ENTITY vulg "vulgar expression or word">
<!ENTITY adj-kari "`kari' adjective (archaic)">
<!ENTITY adj-ku "`ku' adjective (archaic)">
<!ENTITY adj-shiku "`shiku' adjective (archaic)">
<!ENTITY adj-nari "archaic/formal form of na-adjective">
<!ENTITY n-pr "proper noun">
<!ENTITY v-unspec "verb unspecified">
<!ENTITY v4k "Yodan verb with `ku' ending (archaic)">
<!ENTITY v4g "Yodan verb with `gu' ending (archaic)">
<!ENTITY v4s "Yodan verb with `su' ending (archaic)">
<!ENTITY v4t "Yodan verb with `tsu' ending (archaic)">
<!ENTITY v4n "Yodan verb with `nu' ending (archaic)">
<!ENTITY v4b "Yodan verb with `bu' ending (archaic)">
<!ENTITY v4m "Yodan verb with `mu' ending (archaic)">
<!ENTITY v2k-k "Nidan verb (upper class) with `ku' ending (archaic)">
<!ENTITY v2g-k "Nidan verb (upper class) with `gu' ending (archaic)">
<!ENTITY v2t-k "Nidan verb (upper class) with `tsu' ending (archaic)">
<!ENTITY v2d-k "Nidan verb (upper class) with `dzu' ending (archaic)">
<!ENTITY v2h-k "Nidan verb (upper class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-k "Nidan verb (upper class) with `bu' ending (archaic)">
<!ENTITY v2m-k "Nidan verb (upper class) with `mu' ending (archaic)">
<!ENTITY v2y-k "Nidan verb (upper class) with `yu' ending (archaic)">
<!ENTITY v2r-k "Nidan verb (upper class) with `ru' ending (archaic)">
<!ENTITY v2k-s "Nidan verb (lower class) with `ku' ending (archaic)">
<!ENTITY v2g-s "Nidan verb (lower class) with `gu' ending (archaic)">
<!ENTITY v2s-s "Nidan verb (lower class) with `su' ending (archaic)">
<!ENTITY v2z-s "Nidan verb (lower class) with `zu' ending (archaic)">
<!ENTITY v2t-s "Nidan verb (lower class) with `tsu' ending (archaic)">
<!ENTITY v2d-s "Nidan verb (lower class) with `dzu' ending (archaic)">
<!ENTITY v2n-s "Nidan verb (lower class) with `nu' ending (archaic)">
<!ENTITY v2h-s "Nidan verb (lower class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-s "Nidan verb (lower class) with `bu' ending (archaic)">
<!ENTITY v2m-s "Nidan verb (lower class) with `mu' ending (archaic)">
<!ENTITY v2y-s "Nidan verb (lower class) with `yu' ending (archaic)">
<!ENTITY v2r-s "Nidan verb (lower class) with `ru' ending (archaic)">
<!ENTITY v2w-s "Nidan verb (lower class) with `u' ending and `we' conjugation (archaic)">
<!ENTITY archit "architecture term">
<!ENTITY astron "astronomy, etc. term">
<!ENTITY baseb "baseball term">
<!ENTITY biol "biology term">
<!ENTITY bot "botany term">
<!ENTITY bus "business term">
<!ENTITY econ "economics term">
<!ENTITY engr "engineering term">
<!ENTITY finc "finance term">
<!ENTITY geol "geology, etc. term">
<!ENTITY law "law, etc. term">
<!ENTITY mahj "mahjong term">
<!ENTITY med "medicine, etc. term">
<!ENTITY music "music term">
<!ENTITY Shinto "Shinto term">
<!ENTITY shogi "shogi term">
<!ENTITY sports "sports term">
<!ENTITY sumo "sumo term">
<!ENTITY zool "zoology term">
<!ENTITY joc "jocular, humorous term">
<!ENTITY anat "anatomical term">
]>
<JMdict>
<entry>
<ent_seq>1000000</ent_seq>
<r_ele>
<reb>ヽ</reb>
</r_ele>
<r_ele>
<reb>くりかえし</reb>
</r_ele>
<sense>
<pos>&n;</pos>
<gloss xml:lang="eng">repetition mark in katakana</gloss>
</sense>
</entry>
<entry>
<ent_seq>1000010</ent_seq>
<r_ele>
<reb>ヾ</reb>
</r_ele>
<r_ele>
<reb>くりかえし</reb>
</r_ele>
<sense>
<pos>&n;</pos>
<gloss xml:lang="eng">voiced repetition mark in katakana</gloss>
</sense>
</entry>
</JMdict>
错误行对应最后的<pos>&n;</pos>
所以好像他无法映射引用的实体(<!ENTITY n "noun (common) (futsuumeishi)">
)
您需要告诉 XmlSerializer
(或者更确切地说,底层 XmlReader
)像这样扩展 XML entity references by setting XmlReaderSettings.DtdProcessing = DtdProcessing.Parse
是安全的:
static entry[] DeserializeEntries(string filePath)
{
var settings = new XmlReaderSettings
{
// Allow processing of DTD
DtdProcessing = DtdProcessing.Parse,
// On older versions of .Net instead set
//ProhibitDtd = false,
// But for security, prevent DOS attacks by limiting the total number of characters that can be expanded to something sane.
MaxCharactersFromEntities = (long)1e7,
// And for security, disable resolution of entities from external documents.
XmlResolver = null,
};
using (var reader = new StreamReader(filePath, Encoding.UTF8))
using (var xmlReader = XmlReader.Create(reader, settings))
{
var serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(xmlReader);
}
}
备注:
不受信任的 XML 可以通过巧妙构造的 DTD 实体和实体引用强制内存不足异常来执行拒绝服务攻击,如图所示 here. Setting
XmlReaderSettings.MaxCharactersFromEntities
一些合理的东西应该可以减轻这种情况。此特定值取自 reference source for
XElement.Load()
。修改以满足您的需要。类似地设置
XmlReaderSettings.XmlResolver = null
可防止不受信任的 XML 文件生成对外部资源的意外请求。如果您打算使用
XmlRootAttribute
覆盖构造一个XmlSerializer
,您必须静态缓存序列化程序以避免严重的内存泄漏,如 here.
工作.Net fiddle。