Java - 获取脚本标签内的文本
Java - Obtain text within script tag
如何在此脚本中获取值 https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35?
第一个 Link 值 https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 不起作用。
String html ="<script>function getcookie(Name){\n" +
" var search=Name+\"=\";\n" +
" if(document.cookie.length>0){\n" +
" offset=document.cookie.indexOf(search)\n" +
" if(offset!=-1){\n" +
" offset+=search.length\n" +
" end=document.cookie.indexOf(\";\",offset)\n" +
" if(end==-1){end=document.cookie.length}\n" +
" return unescape(document.cookie.substring(offset, en))\n" +
" }\n" +
" }\n" +
"}\n" +
" var player = new Playerjs({\n" +
" \"id\":\"player\",\n" +
" \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
" \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
" \"default_quality\":\"SD (480p)\"\n" +
" });\n" +
" function PlayerjsEvents(event,id,data){\n" +
" if(event==\"start\"){\n" +
" var xhttp = new XMLHttpRequest();\n" +
" xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
" xhttp.send();\n" +
" }\n" +
" if(event==\"end\"){\n" +
" \n" +
" }\n" +
" }</script>";
Pattern p = Pattern.compile("file\"",Pattern.DOTALL);
String url = "";
for (Element element : script) {
Matcher m = p.matcher(element.data());
if (m.find()){
url = m.group(1);
}
}
System.out.println(url);
我不太明白 Java 正则表达式模式是如何工作的。我多次试图找到这个 URL,但都失败了。
如果有人愿意为此提供帮助,或者至少给我 link 新手 java 正则表达式指南,我将不胜感激。谢谢
在我看来,您正在尝试使用 java 从 JS 代码中提取 URL。
网上有许多正则表达式游乐场,您可以尝试,例如:
- http://buildregex.com
- https://regex101.com
- 当然还有Google
搜索。
针对您的具体情况:
import java.util.*;
import java.util.regex.*;
import java.net.*;
public class HelloWorld{
public static void main(String []args){
System.out.println("Hello World");
String jsCode ="<script>function getcookie(Name){\n" +
" var search=Name+\"=\";\n" +
" if(document.cookie.length>0){\n" +
" offset=document.cookie.indexOf(search)\n" +
" if(offset!=-1){\n" +
" offset+=search.length\n" +
" end=document.cookie.indexOf(\";\",offset)\n" +
" if(end==-1){end=document.cookie.length}\n" +
" return unescape(document.cookie.substring(offset, en))\n" +
" }\n" +
" }\n" +
"}\n" +
" var player = new Playerjs({\n" +
" \"id\":\"player\",\n" +
" \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
" \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
" \"default_quality\":\"SD (480p)\"\n" +
" });\n" +
" function PlayerjsEvents(event,id,data){\n" +
" if(event==\"start\"){\n" +
" var xhttp = new XMLHttpRequest();\n" +
" xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
" xhttp.send();\n" +
" }\n" +
" if(event==\"end\"){\n" +
" \n" +
" }\n" +
" }</script>";
List<String> urls = new ArrayList<String>();
String myUrlPattern = "((https?|file):((//)|(\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)";
Pattern p = Pattern.compile(myUrlPattern);
Matcher m = p.matcher(jsCode);
while (m.find()) {
urls.add(m.group());
}
for(String s: urls)
System.out.println(s);
}
}
给出:
- https://media.example.com/img/2147414277.jpg
- https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886
- https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,
- https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886
- https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35
希望对您有所帮助!
如何在此脚本中获取值 https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35? 第一个 Link 值 https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 不起作用。
String html ="<script>function getcookie(Name){\n" +
" var search=Name+\"=\";\n" +
" if(document.cookie.length>0){\n" +
" offset=document.cookie.indexOf(search)\n" +
" if(offset!=-1){\n" +
" offset+=search.length\n" +
" end=document.cookie.indexOf(\";\",offset)\n" +
" if(end==-1){end=document.cookie.length}\n" +
" return unescape(document.cookie.substring(offset, en))\n" +
" }\n" +
" }\n" +
"}\n" +
" var player = new Playerjs({\n" +
" \"id\":\"player\",\n" +
" \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
" \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
" \"default_quality\":\"SD (480p)\"\n" +
" });\n" +
" function PlayerjsEvents(event,id,data){\n" +
" if(event==\"start\"){\n" +
" var xhttp = new XMLHttpRequest();\n" +
" xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
" xhttp.send();\n" +
" }\n" +
" if(event==\"end\"){\n" +
" \n" +
" }\n" +
" }</script>";
Pattern p = Pattern.compile("file\"",Pattern.DOTALL);
String url = "";
for (Element element : script) {
Matcher m = p.matcher(element.data());
if (m.find()){
url = m.group(1);
}
}
System.out.println(url);
我不太明白 Java 正则表达式模式是如何工作的。我多次试图找到这个 URL,但都失败了。 如果有人愿意为此提供帮助,或者至少给我 link 新手 java 正则表达式指南,我将不胜感激。谢谢
在我看来,您正在尝试使用 java 从 JS 代码中提取 URL。 网上有许多正则表达式游乐场,您可以尝试,例如:
- http://buildregex.com
- https://regex101.com
- 当然还有Google 搜索。
针对您的具体情况:
import java.util.*;
import java.util.regex.*;
import java.net.*;
public class HelloWorld{
public static void main(String []args){
System.out.println("Hello World");
String jsCode ="<script>function getcookie(Name){\n" +
" var search=Name+\"=\";\n" +
" if(document.cookie.length>0){\n" +
" offset=document.cookie.indexOf(search)\n" +
" if(offset!=-1){\n" +
" offset+=search.length\n" +
" end=document.cookie.indexOf(\";\",offset)\n" +
" if(end==-1){end=document.cookie.length}\n" +
" return unescape(document.cookie.substring(offset, en))\n" +
" }\n" +
" }\n" +
"}\n" +
" var player = new Playerjs({\n" +
" \"id\":\"player\",\n" +
" \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
" \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
" \"default_quality\":\"SD (480p)\"\n" +
" });\n" +
" function PlayerjsEvents(event,id,data){\n" +
" if(event==\"start\"){\n" +
" var xhttp = new XMLHttpRequest();\n" +
" xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
" xhttp.send();\n" +
" }\n" +
" if(event==\"end\"){\n" +
" \n" +
" }\n" +
" }</script>";
List<String> urls = new ArrayList<String>();
String myUrlPattern = "((https?|file):((//)|(\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)";
Pattern p = Pattern.compile(myUrlPattern);
Matcher m = p.matcher(jsCode);
while (m.find()) {
urls.add(m.group());
}
for(String s: urls)
System.out.println(s);
}
}
给出:
- https://media.example.com/img/2147414277.jpg
- https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886
- https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,
- https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886
- https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35
希望对您有所帮助!