Trying out model
This commit is contained in:
14
src/main/java/com/rak/model/config/TestConverter.java
Normal file
14
src/main/java/com/rak/model/config/TestConverter.java
Normal file
@@ -0,0 +1,14 @@
|
||||
package com.rak.model.config;
|
||||
|
||||
import org.eclipse.microprofile.config.spi.Converter;
|
||||
|
||||
public class TestConverter {
|
||||
|
||||
public static class ActualConverter implements Converter<Attribute> {
|
||||
@Override
|
||||
public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -13,7 +13,7 @@ interface SourcesConfiguration {
|
||||
fun id(): String
|
||||
fun name(): String
|
||||
fun domain(): String
|
||||
fun urlPatterns(): Optional<MutableList<String>>
|
||||
fun urlPatterns(): Optional<MutableSet<String>>
|
||||
fun selectors(): Selectors
|
||||
|
||||
interface Selectors {
|
||||
|
||||
@@ -31,7 +31,7 @@ class ExampleResource(
|
||||
provider: String,
|
||||
@RestQuery
|
||||
path: String
|
||||
): String {
|
||||
): Map<String, String> {
|
||||
val sources = sourcesConfiguration
|
||||
.sources()
|
||||
.filter {
|
||||
@@ -44,13 +44,19 @@ class ExampleResource(
|
||||
.trim()
|
||||
.replace(" ", "_")
|
||||
|
||||
Log.info(newPath)
|
||||
|
||||
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get()
|
||||
|
||||
val regionalSetSelector = source.selectors().regionalSet().get()
|
||||
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!!
|
||||
|
||||
return scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps()) ?: "whoomp whoomp"
|
||||
val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps())
|
||||
val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps())
|
||||
val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps())
|
||||
|
||||
return mapOf(
|
||||
Pair("id", setId ?: "N/A"),
|
||||
Pair("language", setLanguage ?: "N/A"),
|
||||
Pair("key", setKey ?: "N/A"),
|
||||
)
|
||||
}
|
||||
}
|
||||
7
src/main/kotlin/com/rak/model/XPathTarget.kt
Normal file
7
src/main/kotlin/com/rak/model/XPathTarget.kt
Normal file
@@ -0,0 +1,7 @@
|
||||
package com.rak.model
|
||||
|
||||
enum class XPathTarget {
|
||||
TEXT,
|
||||
ATTRIBUTE,
|
||||
ELEMENT
|
||||
}
|
||||
5
src/main/kotlin/com/rak/model/config/Attribute.kt
Normal file
5
src/main/kotlin/com/rak/model/config/Attribute.kt
Normal file
@@ -0,0 +1,5 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class Attribute(
|
||||
val steps: Set<Step>
|
||||
)
|
||||
8
src/main/kotlin/com/rak/model/config/CardConfigModel.kt
Normal file
8
src/main/kotlin/com/rak/model/config/CardConfigModel.kt
Normal file
@@ -0,0 +1,8 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class CardConfigModel(
|
||||
override val root: String,
|
||||
val name: Attribute,
|
||||
val attack: Attribute,
|
||||
val effect: Attribute,
|
||||
) : ConfigModel
|
||||
5
src/main/kotlin/com/rak/model/config/ConfigModel.kt
Normal file
5
src/main/kotlin/com/rak/model/config/ConfigModel.kt
Normal file
@@ -0,0 +1,5 @@
|
||||
package com.rak.model.config
|
||||
|
||||
interface ConfigModel {
|
||||
val root: String
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class RegionalSetConfigModel(
|
||||
override val root: String,
|
||||
val id: Attribute,
|
||||
val language: Attribute,
|
||||
val regionKey: Attribute
|
||||
) : ConfigModel
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class SourceConfiguration(
|
||||
val id: String,
|
||||
val name: String,
|
||||
val urlPatterns: Set<String>,
|
||||
val selectors: Set<ConfigModel>
|
||||
)
|
||||
11
src/main/kotlin/com/rak/model/config/Step.kt
Normal file
11
src/main/kotlin/com/rak/model/config/Step.kt
Normal file
@@ -0,0 +1,11 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class Step(
|
||||
val type: Type,
|
||||
val value: String
|
||||
) {
|
||||
enum class Type {
|
||||
XPATH,
|
||||
CSS
|
||||
}
|
||||
}
|
||||
@@ -1,68 +1,40 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.model.XPathTarget
|
||||
import com.rak.util.XPathUtil
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
import org.jsoup.select.Evaluator
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
@ApplicationScoped
|
||||
class ScrapeService {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("^.*text\\(\\)$")
|
||||
|
||||
private fun evaluateXpath(element: Element, xpath: String): Element? {
|
||||
return element.selectXpath(xpath).first()
|
||||
}
|
||||
|
||||
private fun evaluateCssSelector(element: Element, cssSelector: String): Element? {
|
||||
return null
|
||||
}
|
||||
// XPath
|
||||
// - text()
|
||||
// - last step (default to text())
|
||||
// CSS
|
||||
// - last step???
|
||||
private fun untilText(): String? {
|
||||
|
||||
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fun extractTextFromRootBySteps(
|
||||
root: Element,
|
||||
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition>
|
||||
): String? {
|
||||
var currentElement: Element? = root.clone()
|
||||
val stepsAsQueue = LinkedBlockingQueue(
|
||||
steps
|
||||
)
|
||||
|
||||
while (stepsAsQueue.isNotEmpty()) {
|
||||
val step = stepsAsQueue.take()
|
||||
val stepTargetsTextNode: Boolean = TEXT_NODE_MATCHER.matches(step.value())
|
||||
var result: String? = null
|
||||
|
||||
for (index in 0 until steps.size) {
|
||||
val currentStep = steps.elementAtOrNull(index) ?: return null
|
||||
if (currentElement == null) {
|
||||
return null
|
||||
throw IllegalStateException()
|
||||
}
|
||||
|
||||
currentElement = if (step.type() == "xpath") {
|
||||
if (stepTargetsTextNode) {
|
||||
return currentElement.selectXpath(step.value(), TextNode::class.java).first().text()
|
||||
}
|
||||
else {
|
||||
currentElement.selectXpath(step.value()).first()
|
||||
}
|
||||
if (index == steps.size - 1) {
|
||||
result = XPathUtil.extractResult(currentElement, currentStep.value())
|
||||
}
|
||||
else {
|
||||
currentElement.selectFirst(step.value())
|
||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
31
src/main/kotlin/com/rak/service/SourceService.kt
Normal file
31
src/main/kotlin/com/rak/service/SourceService.kt
Normal file
@@ -0,0 +1,31 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.model.config.SourceConfiguration
|
||||
import jakarta.annotation.PostConstruct
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
|
||||
@ApplicationScoped
|
||||
class SourceService (
|
||||
private val sourcesConfiguration: SourcesConfiguration
|
||||
) {
|
||||
|
||||
private val sources: MutableSet<SourceConfiguration> = mutableSetOf()
|
||||
|
||||
@PostConstruct
|
||||
fun init() {
|
||||
sourcesConfiguration
|
||||
.sources()
|
||||
.forEach { source ->
|
||||
val config = SourceConfiguration(
|
||||
source.id(),
|
||||
source.name(),
|
||||
source.urlPatterns().orElse(mutableSetOf()),
|
||||
setOf()
|
||||
)
|
||||
|
||||
sources.add(config)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
54
src/main/kotlin/com/rak/util/XPathUtil.kt
Normal file
54
src/main/kotlin/com/rak/util/XPathUtil.kt
Normal file
@@ -0,0 +1,54 @@
|
||||
package com.rak.util
|
||||
|
||||
import com.rak.model.XPathTarget
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
|
||||
class XPathUtil private constructor() {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("^.*text\\(\\)$")
|
||||
private val ATTRIBUTE_MATCHER: Regex = Regex("^//[/a-z]*@([a-z]*)$")
|
||||
|
||||
private fun extractTextFromAttribute(root: Element, xpath: String): String? {
|
||||
val groupMatcher = ATTRIBUTE_MATCHER.matchEntire(xpath)
|
||||
val attributeName = groupMatcher?.groupValues[1] ?: return null
|
||||
val attributeValue = root.attr(attributeName)
|
||||
|
||||
return attributeValue.ifBlank {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractTextFromNode(root: Element, xpath: String): String? {
|
||||
return root
|
||||
.selectXpath(xpath, TextNode::class.java)
|
||||
.firstOrNull()?.text()
|
||||
}
|
||||
|
||||
fun getNextElement(root: Element, path: String): Element? {
|
||||
return root.selectXpath(path).firstOrNull()
|
||||
}
|
||||
|
||||
fun extractResult(root: Element, path: String): String? {
|
||||
return when (getXPathTargetFromPath(path)) {
|
||||
XPathTarget.TEXT -> extractTextFromNode(root, path)
|
||||
XPathTarget.ATTRIBUTE -> extractTextFromAttribute(root, path)
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
|
||||
fun getXPathTargetFromPath(path: String): XPathTarget {
|
||||
return if (TEXT_NODE_MATCHER.matches(path)) {
|
||||
XPathTarget.TEXT
|
||||
} else if (ATTRIBUTE_MATCHER.matches(path)) {
|
||||
XPathTarget.ATTRIBUTE
|
||||
} else {
|
||||
XPathTarget.ELEMENT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
com.rak.model.config.TestConverter$ActualConverter
|
||||
@@ -1 +0,0 @@
|
||||
quarkus.config.locations=sources.yml
|
||||
@@ -33,10 +33,12 @@ scraper:
|
||||
language:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "//li/abbr/@title"
|
||||
value: "//li/abbr"
|
||||
- type: "xpath"
|
||||
value: "//abbr/@title"
|
||||
region-key:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "//li/abbr/text()"
|
||||
value: "//li/abbr/text()"
|
||||
testing:
|
||||
waaa: test
|
||||
Reference in New Issue
Block a user