Lines 65-70
Link Here
|
65 |
public class PhpEmbeddingProvider extends EmbeddingProvider { |
65 |
public class PhpEmbeddingProvider extends EmbeddingProvider { |
66 |
|
66 |
|
67 |
public static final String GENERATED_CODE = "@@@"; //NOI18N |
67 |
public static final String GENERATED_CODE = "@@@"; //NOI18N |
|
|
68 |
private static final int MAX_EMBEDDING_LENGTH = 5000000; //cca 5M |
69 |
private static final Logger LOGGER = Logger.getLogger(PhpEmbeddingProvider.class.getName()); |
70 |
private static final String HTML_MIME_TYPE = "text/html"; //NOI18N |
68 |
|
71 |
|
69 |
@Override |
72 |
@Override |
70 |
public List<Embedding> getEmbeddings(Snapshot snapshot) { |
73 |
public List<Embedding> getEmbeddings(Snapshot snapshot) { |
Lines 100-108
Link Here
|
100 |
} else { |
103 |
} else { |
101 |
if (from >= 0) { |
104 |
if (from >= 0) { |
102 |
//lets suppose the text is always html :-( |
105 |
//lets suppose the text is always html :-( |
103 |
embeddings.add(snapshot.create(from, len, "text/html")); //NOI18N |
106 |
createHtmlEmbedding(embeddings, snapshot, from, len); |
104 |
//add only one virtual generated token for a sequence of PHP tokens |
107 |
//add only one virtual generated token for a sequence of PHP tokens |
105 |
embeddings.add(snapshot.create(GENERATED_CODE, "text/html")); |
108 |
embeddings.add(snapshot.create(GENERATED_CODE, HTML_MIME_TYPE)); |
106 |
} |
109 |
} |
107 |
|
110 |
|
108 |
from = -1; |
111 |
from = -1; |
Lines 111-129
Link Here
|
111 |
} |
114 |
} |
112 |
|
115 |
|
113 |
if (from >= 0) { |
116 |
if (from >= 0) { |
114 |
embeddings.add(snapshot.create(from, len, "text/html")); //NOI18N |
117 |
createHtmlEmbedding(embeddings, snapshot, from, len); |
115 |
} |
118 |
} |
116 |
|
119 |
|
117 |
if (embeddings.isEmpty()) { |
120 |
if (embeddings.isEmpty()) { |
118 |
//always embed html even if there isn't any |
121 |
//always embed html even if there isn't any |
119 |
//this causes the parsing api to run tasks registered to text/html |
122 |
//this causes the parsing api to run tasks registered to text/html |
120 |
//even if there isn't any html content |
123 |
//even if there isn't any html content |
121 |
return Collections.singletonList(snapshot.create("", "text/html")); |
124 |
return Collections.singletonList(snapshot.create("", HTML_MIME_TYPE)); |
122 |
} else { |
125 |
} else { |
123 |
return Collections.singletonList(Embedding.create(embeddings)); |
126 |
return Collections.singletonList(Embedding.create(embeddings)); |
124 |
} |
127 |
} |
125 |
} |
128 |
} |
126 |
|
129 |
|
|
|
130 |
private static void createHtmlEmbedding(List<Embedding> embeddings, Snapshot snapshot, int from, int length) { |
131 |
assert embeddings != null; |
132 |
assert snapshot != null; |
133 |
if (length <= MAX_EMBEDDING_LENGTH) { |
134 |
embeddings.add(snapshot.create(from, length, HTML_MIME_TYPE)); //NOI18N |
135 |
} else { |
136 |
LOGGER.log(Level.FINE, "HTML embedding wasn''t created - from: {0}, length: {1}", new Object[] {from, length}); |
137 |
} |
138 |
} |
139 |
|
127 |
@Override |
140 |
@Override |
128 |
public int getPriority() { |
141 |
public int getPriority() { |
129 |
return 110; |
142 |
return 110; |