2024/12/28:JSP解析追踪尝试

试着追踪下tomcat解析jsp的流程,看看jsp最终怎么解析为java文件

环境:github上main分支直接克隆,commit更新至fdec436 windows11 java8u65

JspServlet.service

jsp在tomcat中默认使用这个类进行解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
public void service (HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {

String jspUri = jspFile;

if (jspUri == null) {
... 获取jspuri
}

...

try {
boolean precompile = preCompile(request);//检查是否预编译
serviceJspFile(request, response, jspUri, precompile);//下一步处理
} catch (RuntimeException | IOException | ServletException e) {
throw e;
} catch (Throwable e) {
ExceptionUtils.handleThrowable(e);
throw new ServletException(e);
}

}

JspServlet.serviceJspFile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
private void serviceJspFile(HttpServletRequest request,
HttpServletResponse response, String jspUri,
boolean precompile)
throws ServletException, IOException {

JspServletWrapper wrapper = rctxt.getWrapper(jspUri);
if (wrapper == null) {//判断是否已加载,没有则封装wrapper
synchronized(this) {
wrapper = rctxt.getWrapper(jspUri);
if (wrapper == null) {
// 避免重复加载
if (null == context.getResource(jspUri)) {
handleMissingResource(request, response, jspUri);
return;
}
wrapper = new JspServletWrapper(config, options, jspUri,
rctxt);
rctxt.addWrapper(jspUri,wrapper);
}
}
}

try {
wrapper.service(request, response, precompile);
} catch (FileNotFoundException fnfe) {
handleMissingResource(request, response, jspUri);
}

}

JspServletWrappe.service

这个函数主要功能:1.编译 2.加载编译后的class文件 3.处理加载jsp的数量限制 4.处理请求

对编译部分的处理如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
if (options.getDevelopment() || mustCompile) {
synchronized (this) {
if (options.getDevelopment() || mustCompile) {
// The following sets reload to true, if necessary
ctxt.compile();
mustCompile = false;
}
}
} else {
if (compileException != null) {
// Throw cached compilation exception
throw compileException;
}
}

其中,ctxt的值在JspServletWrapper初始化时被赋予:

1
2
3
ctxt = new JspCompilationContext(jspUri, tagInfo, options,
servletContext, this, rctxt,
tagJar);

JspCompilationContext.compile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
public void compile() throws JasperException, FileNotFoundException {
createCompiler();//创建编译类
if (jspCompiler.isOutDated()) {//这个判断会获得jsp时间戳和对应java或class时间戳判断是否需要重新编译
if (isRemoved()) {
throw new FileNotFoundException(jspUri);
}
try {
jspCompiler.removeGeneratedFiles();
jspLoader = null;
jspCompiler.compile();//执行编译
jsw.setReload(true);
jsw.setCompilationException(null);
} catch (JasperException ex) {
// Cache compilation exception
jsw.setCompilationException(ex);
if (options.getDevelopment() && options.getRecompileOnFail()) {
// Force a recompilation attempt on next access
jsw.setLastModificationTest(-1);
}
throw ex;
} catch (FileNotFoundException fnfe) {
// Re-throw to let caller handle this - will result in a 404
throw fnfe;
} catch (Exception ex) {
JasperException je = new JasperException(
Localizer.getMessage("jsp.error.unable.compile"),
ex);
// Cache compilation exception
jsw.setCompilationException(je);
throw je;
}
}
}

createCompiler:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
public Compiler createCompiler() {//默认使用org.apache.jasper.compiler.JDTCompiler
if (jspCompiler != null ) {
return jspCompiler;
}
jspCompiler = null;
if (options.getCompilerClassName() != null) {
jspCompiler = createCompiler(options.getCompilerClassName());//获取类名进行类加载
} else {
if (options.getCompiler() == null) {
jspCompiler = createCompiler("org.apache.jasper.compiler.JDTCompiler");
if (jspCompiler == null) {
jspCompiler = createCompiler("org.apache.jasper.compiler.AntCompiler");
}
} else {
jspCompiler = createCompiler("org.apache.jasper.compiler.AntCompiler");
if (jspCompiler == null) {
jspCompiler = createCompiler("org.apache.jasper.compiler.JDTCompiler");
}
}
}
if (jspCompiler == null) {
throw new IllegalStateException(Localizer.getMessage("jsp.error.compiler.config",
options.getCompilerClassName(), options.getCompiler()));
}
jspCompiler.init(this, jsw);
return jspCompiler;
}

AntCompiler和JDTCompiler均为Compiler子类

(JDT)Compiler.compile

JDTCompiler未重写Compiler的compiler函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
public void compile(boolean compileClass, boolean jspcMode)
throws FileNotFoundException, JasperException, Exception {
if (errDispatcher == null) {
this.errDispatcher = new ErrorDispatcher(jspcMode);
}

try {
final Long jspLastModified = ctxt.getLastModified(ctxt.getJspFile());
Map<String,SmapStratum> smaps = generateJava();//生成java文件

File javaFile = new File(ctxt.getServletJavaFileName());
if (!javaFile.setLastModified(jspLastModified.longValue())) {
throw new JasperException(Localizer.getMessage("jsp.error.setLastModified", javaFile));
}
if (compileClass) {//判断是否生成class文件
generateClass(smaps);
//生成class文件,在JDTCompiler中重写
...
//获得文件路径设置了时间戳,与上面对java的处理类似
}
} finally {
...
//垃圾回收
}
}

(JDT)Compiler.generateJava

同样的,generateJava函数在JDTCompiler里也并未重写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
...
// Parse the file
ParserController parserCtl = new ParserController(ctxt, this);

// Pass 1 - the directives
Node.Nodes directives = parserCtl.parseDirectives(ctxt.getJspFile());
//在parseDirectives设置directiveOnly为true并从JspCompilationContext获取isTagFile的值
Validator.validateDirectives(this, directives);

// Pass 2 - the whole translation unit
pageNodes = parserCtl.parse(ctxt.getJspFile());
//parse设置directiveOnly为false,从JspCompilationContext获取isTagFile的值
//跟第一步一样使用ParserController.doParse处理

// Leave this until now since it can only be set once - bug 49726
if (pageInfo.getContentType() == null && jspProperty.getDefaultContentType() != null) {
pageInfo.setContentType(jspProperty.getDefaultContentType());
}

//以上代码解析jsp文件,将标签以节点形式保存

if (ctxt.isPrototypeMode()) {
// generate prototype .java file for the tag file
try (ServletWriter writer = setupContextWriter(javaFileName)) {
Generator.generate(writer, this, pageNodes);
return null;
}
}


Validator.validateExDirectives(this, pageNodes);

if (log.isDebugEnabled()) {
t2 = System.currentTimeMillis();
}

// Collect page info
Collector.collect(this, pageNodes);

tfp = new TagFileProcessor();
tfp.loadTagFiles(this, pageNodes);

if (log.isDebugEnabled()) {
t3 = System.currentTimeMillis();
}

// Determine which custom tag needs to declare which scripting vars
ScriptingVariabler.set(pageNodes, errDispatcher);

// Optimizations by Tag Plugins
TagPluginManager tagPluginManager = options.getTagPluginManager();
tagPluginManager.apply(pageNodes, errDispatcher, pageInfo);

// Optimization: concatenate contiguous template texts.
TextOptimizer.concatenate(this, pageNodes);

// Generate static function mapper codes.
ELFunctionMapper.map(pageNodes);

// generate servlet .java file
try (ServletWriter writer = setupContextWriter(javaFileName)) {
Generator.generate(writer, this, pageNodes);
}
//生成java文件,取出节点的内容并拼接为java文件

初步解析

parserCtl.parseDirectives和parserCtl.parse最终处理都在ParserController.doParse里

不过第一步解析只解析jsp指令,第二步解析整个文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
private Node.Nodes doParse(String inFileName, Node parent, Jar jar)
throws FileNotFoundException, JasperException, IOException {

Node.Nodes parsedPage = null;
isEncodingSpecifiedInProlog = false;
isBomPresent = false;
isDefaultPageEncoding = false;

boolean processingTagInJar = jar != null && baseDirStack.peekFirst() != null &&
baseDirStack.peekFirst().startsWith(TAGS_IN_JAR_LOCATION);
String absFileName = null;
try {
absFileName = resolveFileName(inFileName);//对文件路径标准化处理
} catch (URISyntaxException e) {
err.jspError("jsp.error.invalid.includeInTagFileJar", inFileName, jar.getJarFileURL().toString());
}
if (processingTagInJar && !absFileName.startsWith(TAGS_IN_JAR_LOCATION)) {
//检测jar包特征
err.jspError("jsp.error.invalid.includeInTagFileJar", inFileName, jar.getJarFileURL().toString());
}
String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
//从web.xml获取指定url的<page-encoding>元素的值

determineSyntaxAndEncoding(absFileName, jar, jspConfigPageEnc);
//判断文件是否以xml解析 及文件编码 详细见下

if (parent != null) {
//添加依赖资源
if (jar == null) {
compiler.getPageInfo().addDependant(absFileName,
ctxt.getLastModified(absFileName));
} else {
String entry = absFileName.substring(1);
compiler.getPageInfo().addDependant(jar.getURL(entry),
Long.valueOf(jar.getLastModified(entry)));

}
}

if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
//再次确认编码是否正确,避免从配置文件读取的编码与识别出来的编码不同
if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
&& (!jspConfigPageEnc.startsWith("UTF-16")
|| !sourceEnc.startsWith("UTF-16"))) {
err.jspError("jsp.error.prolog_config_encoding_mismatch",
sourceEnc, jspConfigPageEnc);
}
}
//对不同语法的jsp进行解析
//在下面补充
if (isXml) {

parsedPage = JspDocumentParser.parse(this, absFileName, jar, parent,
isTagFile, directiveOnly, sourceEnc, jspConfigPageEnc,
isEncodingSpecifiedInProlog, isBomPresent);
} else {
try (InputStreamReader inStreamReader = JspUtil.getReader(
absFileName, sourceEnc, jar, ctxt, err, skip)) {
JspReader jspReader = new JspReader(ctxt, absFileName,
inStreamReader, err);
parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
directiveOnly, jar, sourceEnc, jspConfigPageEnc,
isDefaultPageEncoding, isBomPresent);
}
}

baseDirStack.remove();

return parsedPage;
}

编码处理

determineSyntaxAndEncoding对是否为xml的解析如下:

1
2
3
4
5
6
7
8
9
10
11
12
		JspConfig jspConfig = ctxt.getOptions().getJspConfig();
JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
absFileName);
if (jspProperty.isXml() != null) {
// If <is-xml> is specified in a <jsp-property-group>, it is used.
isXml = JspUtil.booleanValue(jspProperty.isXml());
isExternal = true;
} else if (absFileName.endsWith(".jspx") || absFileName.endsWith(".tagx")) {
isXml = true;
isExternal = true;
}
//优先从web.xml中读取,如果没有则以文件扩展名判断

<jsp-property-group>标签内设置了<is-xml>的值为false时,判断如下:

1
2
3
4
5
6
7
8
if (isExternal && !isXml) {

sourceEnc = jspConfigPageEnc;
if (sourceEnc != null) {
return;
}
sourceEnc = "ISO-8859-1";
} else ...

此时若配置文件没对文件编码设置则使用ISO-8859-1

而当文件是xml格式或无法识别时用如下方式(此时isExternal和isXml的值不为true和false就会进入):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
EncodingDetector encodingDetector;
try (BufferedInputStream bis = JspUtil.getInputStream(absFileName, jar, ctxt)) {
encodingDetector = new EncodingDetector(bis);
}

sourceEnc = encodingDetector.getEncoding();
isEncodingSpecifiedInProlog = encodingDetector.isEncodingSpecifiedInProlog();
//通过BOM判断是否指定encoding属性确认编码
isBomPresent = (encodingDetector.getSkip() > 0);//判断是否存在BOM头
skip = encodingDetector.getSkip();

if (!isXml && sourceEnc.equals("UTF-8")) {
sourceEnc = "ISO-8859-1";
revert = true;
}

当文件检测不是xml格式且使用UTF8则改为ISO-8859-1

最后针对无法判断的会再次进入判断:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
JspReader jspReader = null;
try {
jspReader = new JspReader(ctxt, absFileName, sourceEnc, jar, err);
} catch (FileNotFoundException ex) {
throw new JasperException(ex);
}
Mark startMark = jspReader.mark();
if (!isExternal) {
jspReader.reset(startMark);
if (hasJspRoot(jspReader)) {//hasRoot会直接获取root标签判断是否为xml
//检测xmlns:
if (revert) {//这个判断说明之前对是否为xml判断错误,且编码被错误修改
sourceEnc = "UTF-8";
}
isXml = true;
return;
} else {
if (revert && isBomPresent) {//由BOM头判断文件本身为UTF8,在此恢复UTF8编码
sourceEnc = "UTF-8";
}
isXml = false;
}
}

此时,任何存在xml格式的可能都已经排除,可以判断为普通jsp文件,此时仍然无法判断文件编码的jsp直接设置为ISO-8859-1编码

可以看到关于是否为xml语法:文件配置>文件后缀>文件内容

对于文件编码,文件配置优先级最高,xml和存在BOM头默认UTF8,没有BOM头不是xml默认ISO-8859-1

xml解析

针对xml语法使用JspDocumentParser.parse解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
JspDocumentParser jspDocParser = new JspDocumentParser(pc, path, isTagFile, directivesOnly);
Node.Nodes pageNodes = null;

try {

Node.Root dummyRoot = new Node.Root(null, parent, true,pc.getJspCompilationContext().getOptions().getTempVariableNamePrefix());
dummyRoot.setPageEncoding(pageEnc);
dummyRoot.setJspConfigPageEncoding(jspConfigPageEnc);
dummyRoot.setIsEncodingSpecifiedInProlog(isEncodingSpecifiedInProlog);
dummyRoot.setIsBomPresent(isBomPresent);
jspDocParser.current = dummyRoot;
if (parent == null) {
jspDocParser.addInclude(dummyRoot, jspDocParser.pageInfo.getIncludePrelude());
//从parserCtl.parseDirectives和parserCtl.parse进入时默认为null
//根据注释,会实现web.xml中jsp-config元素的include-preclude和include-coda功能
} else {
jspDocParser.isTop = false;
}

jspDocParser.isValidating = false;

SAXParser saxParser = getSAXParser(false, jspDocParser);
InputSource source = JspUtil.getInputSource(path, jar, jspDocParser.ctxt);
try {
saxParser.parse(source, jspDocParser);
//获得一个继承XMLreader接口的类解析
} catch (EnableDTDValidationException e) {
saxParser = getSAXParser(true, jspDocParser);
jspDocParser.isValidating = true;
try {
source.getByteStream().close();
} catch (IOException e2) {

}
source = JspUtil.getInputSource(path, jar, jspDocParser.ctxt);
saxParser.parse(source, jspDocParser);
} finally {
try {
source.getByteStream().close();
} catch (IOException e) {

}
}

if (parent == null) {
jspDocParser.addInclude(dummyRoot, jspDocParser.pageInfo.getIncludeCoda());
}

pageNodes = new Node.Nodes(dummyRoot);

} catch (IOException ioe) {
jspDocParser.err.jspError(ioe, "jsp.error.data.file.read", path);
} catch (SAXParseException e) {
jspDocParser.err.jspError(new Mark(jspDocParser.ctxt, path, e.getLineNumber(), e.getColumnNumber()), e,
e.getMessage());
} catch (Exception e) {
jspDocParser.err.jspError(e, "jsp.error.data.file.processing", path);
}

return pageNodes;

JSP解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
public static Node.Nodes parse(ParserController pc, JspReader reader,
Node parent, boolean isTagFile, boolean directivesOnly,
Jar jar, String pageEnc, String jspConfigPageEnc,
boolean isDefaultPageEncoding, boolean isBomPresent)
throws JasperException {

Parser parser = new Parser(pc, reader, isTagFile, directivesOnly, jar);

Node.Root root = new Node.Root(reader.mark(), parent, false,
pc.getJspCompilationContext().getOptions().getTempVariableNamePrefix());
root.setPageEncoding(pageEnc);
root.setJspConfigPageEncoding(jspConfigPageEnc);
root.setIsDefaultPageEncoding(isDefaultPageEncoding);
root.setIsBomPresent(isBomPresent);

PageInfo pageInfo = pc.getCompiler().getPageInfo();
if (parent == null && !isTagFile) {
parser.addInclude(root, pageInfo.getIncludePrelude());
}
if (directivesOnly) {//对于是否为JSP解析存在不同的处理
parser.parseFileDirectives(root);
} else {
while (reader.hasMoreInput()) {
parser.parseElements(root);
}
}
if (parent == null && !isTagFile) {
parser.addInclude(root, pageInfo.getIncludeCoda());
}

Node.Nodes page = new Node.Nodes(root);
return page;
}

parseFileDirectives

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
private void parseFileDirectives(Node parent) throws JasperException {
reader.skipUntil("<");
while (reader.hasMoreInput()) {
start = reader.mark();
if (reader.matches("%--")) {
// Comment
reader.skipUntil("--%>");
} else if (reader.matches("%@")) {
parseDirective(parent);
} else if (reader.matches("jsp:directive.")) {
parseXMLDirective(parent);
} else if (reader.matches("%!")) {
// Declaration
reader.skipUntil("%>");
} else if (reader.matches("%=")) {
// Expression
reader.skipUntil("%>");
} else if (reader.matches("%")) {
// Scriptlet
reader.skipUntil("%>");
}
reader.skipUntil("<");
}
}

parseElements

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
private void parseElements(Node parent) throws JasperException {
if (scriptlessCount > 0) {
// vc: ScriptlessBody
// We must follow the ScriptlessBody production if one of
// our parents is ScriptlessBody.
parseElementsScriptless(parent);
return;
}

start = reader.mark();
if (reader.matches("<%--")) {
parseComment(parent);
} else if (reader.matches("<%@")) {
parseDirective(parent);
} else if (reader.matches("<jsp:directive.")) {
parseXMLDirective(parent);
} else if (reader.matches("<%!")) {
parseDeclaration(parent);
} else if (reader.matches("<jsp:declaration")) {
parseXMLDeclaration(parent);
} else if (reader.matches("<%=")) {
parseExpression(parent);
} else if (reader.matches("<jsp:expression")) {
parseXMLExpression(parent);
} else if (reader.matches("<%")) {
parseScriptlet(parent);
} else if (reader.matches("<jsp:scriptlet")) {
parseXMLScriptlet(parent);
} else if (reader.matches("<jsp:text")) {
parseXMLTemplateText(parent);
} else if (!pageInfo.isELIgnored() && reader.matches("${")) {
parseELExpression(parent, '$');
} else if (!pageInfo.isELIgnored()
&& !pageInfo.isDeferredSyntaxAllowedAsLiteral()
&& reader.matches("#{")) {
parseELExpression(parent, '#');
} else if (reader.matches("<jsp:")) {
parseStandardAction(parent);
} else if (!parseCustomTag(parent)) {
checkUnbalancedEndTag();
parseTemplateText(parent);
}
}

这之后就是一些对标签内容具体处理了

最终在Generator.generate拼接输出为java代码