从字符串中提取可点击的词并包含标点符号
Extract Clickable words from String and Include Punctuation Marks
我有一个句子和句子中的一系列可点击单词。数组不包含标点符号。
这是一句话
我们在后备箱里放了两根杆子、一罐虫子、一袋三明治和一热水瓶。 “我们要去旅行了,”我父亲说。 “去一个秘密的地方。我们会赶上空气!我们会抓住 breeze!”
这是可点击词的结构。它是一个数组,包含单词在句子中开始和结束位置的索引。 This Array不包含句子中的标点符号
标点符号不可点击。
"tokens": [
{
"position": [
0,
4
],
"value": "into"
},
{
"position": [
5,
8
],
"value": "the"
},
{
"position": [
9,
14
],
"value": "trunk"
},
{
"position": [
15,
17
],
"value": "we"
},
{
"position": [
18,
21
],
"value": "put"
},
{
"position": [
22,
25
],
"value": "two"
},
{
"position": [
26,
31
],
"value": "poles"
},
{
"position": [
32,
35
],
"value": "and"
},
{
"position": [
36,
39
],
"value": "the"
},
{
"position": [
40,
43
],
"value": "can"
},
{
"position": [
44,
46
],
"value": "of"
},
{
"position": [
47,
52
],
"value": "worms"
},
{
"position": [
53,
56
],
"value": "and"
},
{
"position": [
57,
58
],
"value": "a"
},
{
"position": [
59,
63
],
"value": "sack"
},
{
"position": [
64,
66
],
"value": "of"
},
{
"position": [
67,
77
],
"value": "sandwiches"
},
{
"position": [
78,
81
],
"value": "and"
},
{
"position": [
82,
83
],
"value": "a"
},
{
"position": [
84,
91
],
"value": "thermos"
},
{
"position": [
92,
94
],
"value": "of"
},
{
"position": [
95,
100
],
"value": "water"
},
{
"position": [
103,
108
],
"value": "we're"
},
{
"position": [
109,
114
],
"value": "going"
},
{
"position": [
115,
117
],
"value": "on"
},
{
"position": [
118,
119
],
"value": "a"
},
{
"position": [
120,
127
],
"value": "journey"
},
{
"position": [
130,
132
],
"value": "my"
},
{
"position": [
133,
139
],
"value": "father"
},
{
"position": [
140,
144
],
"value": "said"
},
{
"position": [
147,
149
],
"value": "to"
},
{
"position": [
150,
151
],
"value": "a"
},
{
"position": [
152,
158
],
"value": "secret"
},
{
"position": [
159,
164
],
"value": "place"
},
{
"position": [
166,
171
],
"value": "we'll"
},
{
"position": [
172,
177
],
"value": "catch"
},
{
"position": [
178,
181
],
"value": "the"
},
{
"position": [
182,
185
],
"value": "air"
},
{
"position": [
187,
192
],
"value": "we'll"
},
{
"position": [
193,
198
],
"value": "catch"
},
{
"position": [
199,
202
],
"value": "the"
},
{
"position": [
203,
209
],
"value": "breeze"
}
]
},
这是我获取可点击字词的代码
const getWordsFromTokens = tokens.reduce((words, token)=>{
let start = token.position[0]; //Start is the first character of the token value in the sentence
let end = token.position[1]; // end is the last character of the token value in the sentence
let diffrenceBetweenLastPositionAndFirst = end+(end-start);
/* You get punctuationMarks or any characters not in the Tokens by getting the string between
the end and diffrence between the end and start
*/
let punctuationMarks = content.substring(end, (diffrenceBetweenLastPositionAndFirst));
console.log(punctuationMarks);
words.push( content.substring(start, end)+punctuationMarks); //concat with any space of pucntuation mark after the word.
return words; //<- return this to be used in next round of reduce untill all words are
},[]);
这是我呈现文本的方式
return (
<div>
<p> {
getWordsFromTokens.map((word, index)=>{
return <a href={'/word/' + word} > {word}</a>
})
}
</p>
</div>
)
这是我的问题,当我渲染文本时,它看起来与原始文本不完全一样。我可能做错了什么?
这是最终结果的样子
我们在 tr 后备箱里放了两个电线杆和一罐 w 条虫子和一袋 s 三明治和一个热水瓶和热水瓶水。 “我们要去旅行了,”我父亲说。说。 “去一个秘密的地方。我们'我们会赶上 ai 空气! W 我们会抓住 br breeze!”
这样的解决方案怎么样?我使用 光标 来跟踪句子中的位置。
const tokens = [{
"position": [
0,
4
],
"value": "into"
},
{
"position": [
5,
8
],
"value": "the"
},
{
"position": [
9,
14
],
"value": "trunk"
},
{
"position": [
15,
17
],
"value": "we"
},
{
"position": [
18,
21
],
"value": "put"
},
{
"position": [
22,
25
],
"value": "two"
},
{
"position": [
26,
31
],
"value": "poles"
},
{
"position": [
32,
35
],
"value": "and"
},
{
"position": [
36,
39
],
"value": "the"
},
{
"position": [
40,
43
],
"value": "can"
},
{
"position": [
44,
46
],
"value": "of"
},
{
"position": [
47,
52
],
"value": "worms"
},
{
"position": [
53,
56
],
"value": "and"
},
{
"position": [
57,
58
],
"value": "a"
},
{
"position": [
59,
63
],
"value": "sack"
},
{
"position": [
64,
66
],
"value": "of"
},
{
"position": [
67,
77
],
"value": "sandwiches"
},
{
"position": [
78,
81
],
"value": "and"
},
{
"position": [
82,
83
],
"value": "a"
},
{
"position": [
84,
91
],
"value": "thermos"
},
{
"position": [
92,
94
],
"value": "of"
},
{
"position": [
95,
100
],
"value": "water"
},
{
"position": [
103,
108
],
"value": "we're"
},
{
"position": [
109,
114
],
"value": "going"
},
{
"position": [
115,
117
],
"value": "on"
},
{
"position": [
118,
119
],
"value": "a"
},
{
"position": [
120,
127
],
"value": "journey"
},
{
"position": [
130,
132
],
"value": "my"
},
{
"position": [
133,
139
],
"value": "father"
},
{
"position": [
140,
144
],
"value": "said"
},
{
"position": [
147,
149
],
"value": "to"
},
{
"position": [
150,
151
],
"value": "a"
},
{
"position": [
152,
158
],
"value": "secret"
},
{
"position": [
159,
164
],
"value": "place"
},
{
"position": [
166,
171
],
"value": "we'll"
},
{
"position": [
172,
177
],
"value": "catch"
},
{
"position": [
178,
181
],
"value": "the"
},
{
"position": [
182,
185
],
"value": "air"
},
{
"position": [
187,
192
],
"value": "we'll"
},
{
"position": [
193,
198
],
"value": "catch"
},
{
"position": [
199,
202
],
"value": "the"
},
{
"position": [
203,
209
],
"value": "breeze"
}
];
const content = 'Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. “We’re going on a journey,” my father said. “To a secret place. We’ll catch the air! We’ll catch the breeze!"';
let cursorPosition = 0; // set a variable to track the position of cursor
const getWordsFromTokens = tokens.reduce((words, token) => {
let tokenStart = token.position[0]; //Start is the first character of the token value in the sentence
let tokenEnd = token.position[1]; // end is the last character of the token value in the sentence
let notWordBeforeThisWord = content.substring(cursorPosition, tokenStart); // get the non-word characters (spaces, punctuation) before the current word
let tokenValue = content.substring(tokenStart, tokenEnd);; // the word value
words.push({
type: 'non-word',
value: notWordBeforeThisWord
}, {
type: 'word',
value: tokenValue
}); //concat with any space of pucntuation mark after the word.
cursorPosition = tokenEnd; // update the cursor position
return words; // return this to be used in next round of reduce untill all words are
}, []);
getWordsFromTokens.forEach(item => {
const htmlToAppend = item.type === 'word' ?
`<a href='/word/${item.value}'>${item.value}</a>` :
item.value
document.getElementById('new-sentence').innerHTML += htmlToAppend;
})
const endOfSentence = content.substring(cursorPosition); // get all carachters (if any) after the last token
document.getElementById('new-sentence').innerHTML = document.getElementById('new-sentence').innerHTML + endOfSentence;
<p id='new-sentence'></p>
我认为使用 RegExp 会让您的生活更轻松:
const content = `Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. "We're going on a journey," my father said. "To a secret place. We'll catch the air! We'll catch the breeze!`;
const result = content.match(/([\w'])+|([\.;,:-_?!"]+[\s"]*["]*)/gim);
console.log(result);
const punctuation = /[\.;,:\-_?!"]+/;
function App() {
return (
<div>
{result.map((w) =>
punctuation.test(w) ? w : <a href={`/word/${w}`}>{w + '\n'}</a>
)}
</div>
);
}
ReactDOM.render(<App/>, document.getElementById("root"))
<div id="root"></div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/react/16.6.3/umd/react.production.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/react-dom/16.6.3/umd/react-dom.production.min.js"></script>
我有一个句子和句子中的一系列可点击单词。数组不包含标点符号。
这是一句话
我们在后备箱里放了两根杆子、一罐虫子、一袋三明治和一热水瓶。 “我们要去旅行了,”我父亲说。 “去一个秘密的地方。我们会赶上空气!我们会抓住 breeze!”
这是可点击词的结构。它是一个数组,包含单词在句子中开始和结束位置的索引。 This Array不包含句子中的标点符号
标点符号不可点击。
"tokens": [
{
"position": [
0,
4
],
"value": "into"
},
{
"position": [
5,
8
],
"value": "the"
},
{
"position": [
9,
14
],
"value": "trunk"
},
{
"position": [
15,
17
],
"value": "we"
},
{
"position": [
18,
21
],
"value": "put"
},
{
"position": [
22,
25
],
"value": "two"
},
{
"position": [
26,
31
],
"value": "poles"
},
{
"position": [
32,
35
],
"value": "and"
},
{
"position": [
36,
39
],
"value": "the"
},
{
"position": [
40,
43
],
"value": "can"
},
{
"position": [
44,
46
],
"value": "of"
},
{
"position": [
47,
52
],
"value": "worms"
},
{
"position": [
53,
56
],
"value": "and"
},
{
"position": [
57,
58
],
"value": "a"
},
{
"position": [
59,
63
],
"value": "sack"
},
{
"position": [
64,
66
],
"value": "of"
},
{
"position": [
67,
77
],
"value": "sandwiches"
},
{
"position": [
78,
81
],
"value": "and"
},
{
"position": [
82,
83
],
"value": "a"
},
{
"position": [
84,
91
],
"value": "thermos"
},
{
"position": [
92,
94
],
"value": "of"
},
{
"position": [
95,
100
],
"value": "water"
},
{
"position": [
103,
108
],
"value": "we're"
},
{
"position": [
109,
114
],
"value": "going"
},
{
"position": [
115,
117
],
"value": "on"
},
{
"position": [
118,
119
],
"value": "a"
},
{
"position": [
120,
127
],
"value": "journey"
},
{
"position": [
130,
132
],
"value": "my"
},
{
"position": [
133,
139
],
"value": "father"
},
{
"position": [
140,
144
],
"value": "said"
},
{
"position": [
147,
149
],
"value": "to"
},
{
"position": [
150,
151
],
"value": "a"
},
{
"position": [
152,
158
],
"value": "secret"
},
{
"position": [
159,
164
],
"value": "place"
},
{
"position": [
166,
171
],
"value": "we'll"
},
{
"position": [
172,
177
],
"value": "catch"
},
{
"position": [
178,
181
],
"value": "the"
},
{
"position": [
182,
185
],
"value": "air"
},
{
"position": [
187,
192
],
"value": "we'll"
},
{
"position": [
193,
198
],
"value": "catch"
},
{
"position": [
199,
202
],
"value": "the"
},
{
"position": [
203,
209
],
"value": "breeze"
}
]
},
这是我获取可点击字词的代码
const getWordsFromTokens = tokens.reduce((words, token)=>{
let start = token.position[0]; //Start is the first character of the token value in the sentence
let end = token.position[1]; // end is the last character of the token value in the sentence
let diffrenceBetweenLastPositionAndFirst = end+(end-start);
/* You get punctuationMarks or any characters not in the Tokens by getting the string between
the end and diffrence between the end and start
*/
let punctuationMarks = content.substring(end, (diffrenceBetweenLastPositionAndFirst));
console.log(punctuationMarks);
words.push( content.substring(start, end)+punctuationMarks); //concat with any space of pucntuation mark after the word.
return words; //<- return this to be used in next round of reduce untill all words are
},[]);
这是我呈现文本的方式
return (
<div>
<p> {
getWordsFromTokens.map((word, index)=>{
return <a href={'/word/' + word} > {word}</a>
})
}
</p>
</div>
)
这是我的问题,当我渲染文本时,它看起来与原始文本不完全一样。我可能做错了什么?
这是最终结果的样子
我们在 tr 后备箱里放了两个电线杆和一罐 w 条虫子和一袋 s 三明治和一个热水瓶和热水瓶水。 “我们要去旅行了,”我父亲说。说。 “去一个秘密的地方。我们'我们会赶上 ai 空气! W 我们会抓住 br breeze!”
这样的解决方案怎么样?我使用 光标 来跟踪句子中的位置。
const tokens = [{
"position": [
0,
4
],
"value": "into"
},
{
"position": [
5,
8
],
"value": "the"
},
{
"position": [
9,
14
],
"value": "trunk"
},
{
"position": [
15,
17
],
"value": "we"
},
{
"position": [
18,
21
],
"value": "put"
},
{
"position": [
22,
25
],
"value": "two"
},
{
"position": [
26,
31
],
"value": "poles"
},
{
"position": [
32,
35
],
"value": "and"
},
{
"position": [
36,
39
],
"value": "the"
},
{
"position": [
40,
43
],
"value": "can"
},
{
"position": [
44,
46
],
"value": "of"
},
{
"position": [
47,
52
],
"value": "worms"
},
{
"position": [
53,
56
],
"value": "and"
},
{
"position": [
57,
58
],
"value": "a"
},
{
"position": [
59,
63
],
"value": "sack"
},
{
"position": [
64,
66
],
"value": "of"
},
{
"position": [
67,
77
],
"value": "sandwiches"
},
{
"position": [
78,
81
],
"value": "and"
},
{
"position": [
82,
83
],
"value": "a"
},
{
"position": [
84,
91
],
"value": "thermos"
},
{
"position": [
92,
94
],
"value": "of"
},
{
"position": [
95,
100
],
"value": "water"
},
{
"position": [
103,
108
],
"value": "we're"
},
{
"position": [
109,
114
],
"value": "going"
},
{
"position": [
115,
117
],
"value": "on"
},
{
"position": [
118,
119
],
"value": "a"
},
{
"position": [
120,
127
],
"value": "journey"
},
{
"position": [
130,
132
],
"value": "my"
},
{
"position": [
133,
139
],
"value": "father"
},
{
"position": [
140,
144
],
"value": "said"
},
{
"position": [
147,
149
],
"value": "to"
},
{
"position": [
150,
151
],
"value": "a"
},
{
"position": [
152,
158
],
"value": "secret"
},
{
"position": [
159,
164
],
"value": "place"
},
{
"position": [
166,
171
],
"value": "we'll"
},
{
"position": [
172,
177
],
"value": "catch"
},
{
"position": [
178,
181
],
"value": "the"
},
{
"position": [
182,
185
],
"value": "air"
},
{
"position": [
187,
192
],
"value": "we'll"
},
{
"position": [
193,
198
],
"value": "catch"
},
{
"position": [
199,
202
],
"value": "the"
},
{
"position": [
203,
209
],
"value": "breeze"
}
];
const content = 'Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. “We’re going on a journey,” my father said. “To a secret place. We’ll catch the air! We’ll catch the breeze!"';
let cursorPosition = 0; // set a variable to track the position of cursor
const getWordsFromTokens = tokens.reduce((words, token) => {
let tokenStart = token.position[0]; //Start is the first character of the token value in the sentence
let tokenEnd = token.position[1]; // end is the last character of the token value in the sentence
let notWordBeforeThisWord = content.substring(cursorPosition, tokenStart); // get the non-word characters (spaces, punctuation) before the current word
let tokenValue = content.substring(tokenStart, tokenEnd);; // the word value
words.push({
type: 'non-word',
value: notWordBeforeThisWord
}, {
type: 'word',
value: tokenValue
}); //concat with any space of pucntuation mark after the word.
cursorPosition = tokenEnd; // update the cursor position
return words; // return this to be used in next round of reduce untill all words are
}, []);
getWordsFromTokens.forEach(item => {
const htmlToAppend = item.type === 'word' ?
`<a href='/word/${item.value}'>${item.value}</a>` :
item.value
document.getElementById('new-sentence').innerHTML += htmlToAppend;
})
const endOfSentence = content.substring(cursorPosition); // get all carachters (if any) after the last token
document.getElementById('new-sentence').innerHTML = document.getElementById('new-sentence').innerHTML + endOfSentence;
<p id='new-sentence'></p>
我认为使用 RegExp 会让您的生活更轻松:
const content = `Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. "We're going on a journey," my father said. "To a secret place. We'll catch the air! We'll catch the breeze!`;
const result = content.match(/([\w'])+|([\.;,:-_?!"]+[\s"]*["]*)/gim);
console.log(result);
const punctuation = /[\.;,:\-_?!"]+/;
function App() {
return (
<div>
{result.map((w) =>
punctuation.test(w) ? w : <a href={`/word/${w}`}>{w + '\n'}</a>
)}
</div>
);
}
ReactDOM.render(<App/>, document.getElementById("root"))
<div id="root"></div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/react/16.6.3/umd/react.production.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/react-dom/16.6.3/umd/react-dom.production.min.js"></script>